elasticsearch 9.0.1__py3-none-any.whl → 9.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elasticsearch/_async/client/__init__.py +47 -203
- elasticsearch/_async/client/cat.py +594 -32
- elasticsearch/_async/client/cluster.py +14 -4
- elasticsearch/_async/client/eql.py +10 -2
- elasticsearch/_async/client/esql.py +17 -4
- elasticsearch/_async/client/indices.py +100 -47
- elasticsearch/_async/client/inference.py +110 -75
- elasticsearch/_async/client/ingest.py +0 -7
- elasticsearch/_async/client/license.py +4 -4
- elasticsearch/_async/client/ml.py +6 -17
- elasticsearch/_async/client/monitoring.py +1 -1
- elasticsearch/_async/client/rollup.py +1 -22
- elasticsearch/_async/client/security.py +11 -17
- elasticsearch/_async/client/snapshot.py +6 -0
- elasticsearch/_async/client/synonyms.py +1 -0
- elasticsearch/_async/client/watcher.py +4 -2
- elasticsearch/_sync/client/__init__.py +47 -203
- elasticsearch/_sync/client/cat.py +594 -32
- elasticsearch/_sync/client/cluster.py +14 -4
- elasticsearch/_sync/client/eql.py +10 -2
- elasticsearch/_sync/client/esql.py +17 -4
- elasticsearch/_sync/client/indices.py +100 -47
- elasticsearch/_sync/client/inference.py +110 -75
- elasticsearch/_sync/client/ingest.py +0 -7
- elasticsearch/_sync/client/license.py +4 -4
- elasticsearch/_sync/client/ml.py +6 -17
- elasticsearch/_sync/client/monitoring.py +1 -1
- elasticsearch/_sync/client/rollup.py +1 -22
- elasticsearch/_sync/client/security.py +11 -17
- elasticsearch/_sync/client/snapshot.py +6 -0
- elasticsearch/_sync/client/synonyms.py +1 -0
- elasticsearch/_sync/client/watcher.py +4 -2
- elasticsearch/_version.py +1 -1
- elasticsearch/compat.py +5 -0
- elasticsearch/dsl/__init__.py +2 -1
- elasticsearch/dsl/_async/document.py +1 -1
- elasticsearch/dsl/_sync/document.py +1 -1
- elasticsearch/dsl/document_base.py +176 -16
- elasticsearch/dsl/field.py +223 -38
- elasticsearch/dsl/query.py +49 -4
- elasticsearch/dsl/types.py +107 -16
- elasticsearch/dsl/utils.py +1 -1
- elasticsearch/esql/__init__.py +18 -0
- elasticsearch/esql/esql.py +1105 -0
- elasticsearch/esql/functions.py +1738 -0
- {elasticsearch-9.0.1.dist-info → elasticsearch-9.0.3.dist-info}/METADATA +1 -3
- {elasticsearch-9.0.1.dist-info → elasticsearch-9.0.3.dist-info}/RECORD +50 -49
- elasticsearch-9.0.1.dist-info/licenses/LICENSE.txt +0 -175
- elasticsearch-9.0.1.dist-info/licenses/NOTICE.txt +0 -559
- {elasticsearch-9.0.1.dist-info → elasticsearch-9.0.3.dist-info}/WHEEL +0 -0
- {elasticsearch-9.0.1.dist-info → elasticsearch-9.0.3.dist-info}/licenses/LICENSE +0 -0
- {elasticsearch-9.0.1.dist-info → elasticsearch-9.0.3.dist-info}/licenses/NOTICE +0 -0
|
@@ -366,26 +366,44 @@ class InferenceClient(NamespacedClient):
|
|
|
366
366
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
367
367
|
human: t.Optional[bool] = None,
|
|
368
368
|
pretty: t.Optional[bool] = None,
|
|
369
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
369
370
|
) -> ObjectApiResponse[t.Any]:
|
|
370
371
|
"""
|
|
371
372
|
.. raw:: html
|
|
372
373
|
|
|
373
|
-
<p>Create an inference endpoint
|
|
374
|
-
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
375
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
376
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
377
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
378
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
374
|
+
<p>Create an inference endpoint.</p>
|
|
379
375
|
<p>IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
|
|
380
376
|
For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
|
|
381
377
|
However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
|
|
378
|
+
<p>The following integrations are available through the inference API. You can find the available task types next to the integration name:</p>
|
|
379
|
+
<ul>
|
|
380
|
+
<li>AlibabaCloud AI Search (<code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
|
|
381
|
+
<li>Amazon Bedrock (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
382
|
+
<li>Anthropic (<code>completion</code>)</li>
|
|
383
|
+
<li>Azure AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
384
|
+
<li>Azure OpenAI (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
385
|
+
<li>Cohere (<code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
386
|
+
<li>Elasticsearch (<code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code> - this service is for built-in models and models uploaded through Eland)</li>
|
|
387
|
+
<li>ELSER (<code>sparse_embedding</code>)</li>
|
|
388
|
+
<li>Google AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
389
|
+
<li>Google Vertex AI (<code>rerank</code>, <code>text_embedding</code>)</li>
|
|
390
|
+
<li>Hugging Face (<code>text_embedding</code>)</li>
|
|
391
|
+
<li>Mistral (<code>text_embedding</code>)</li>
|
|
392
|
+
<li>OpenAI (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
393
|
+
<li>VoyageAI (<code>text_embedding</code>, <code>rerank</code>)</li>
|
|
394
|
+
<li>Watsonx inference integration (<code>text_embedding</code>)</li>
|
|
395
|
+
<li>JinaAI (<code>text_embedding</code>, <code>rerank</code>)</li>
|
|
396
|
+
</ul>
|
|
382
397
|
|
|
383
398
|
|
|
384
399
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put>`_
|
|
385
400
|
|
|
386
401
|
:param inference_id: The inference Id
|
|
387
402
|
:param inference_config:
|
|
388
|
-
:param task_type: The task type
|
|
403
|
+
:param task_type: The task type. Refer to the integration list in the API description
|
|
404
|
+
for the available task types.
|
|
405
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
406
|
+
to be created.
|
|
389
407
|
"""
|
|
390
408
|
if inference_id in SKIP_IN_PATH:
|
|
391
409
|
raise ValueError("Empty value passed for parameter 'inference_id'")
|
|
@@ -416,6 +434,8 @@ class InferenceClient(NamespacedClient):
|
|
|
416
434
|
__query["human"] = human
|
|
417
435
|
if pretty is not None:
|
|
418
436
|
__query["pretty"] = pretty
|
|
437
|
+
if timeout is not None:
|
|
438
|
+
__query["timeout"] = timeout
|
|
419
439
|
__body = inference_config if inference_config is not None else body
|
|
420
440
|
__headers = {"accept": "application/json", "content-type": "application/json"}
|
|
421
441
|
return await self.perform_request( # type: ignore[return-value]
|
|
@@ -451,6 +471,7 @@ class InferenceClient(NamespacedClient):
|
|
|
451
471
|
human: t.Optional[bool] = None,
|
|
452
472
|
pretty: t.Optional[bool] = None,
|
|
453
473
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
474
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
454
475
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
455
476
|
) -> ObjectApiResponse[t.Any]:
|
|
456
477
|
"""
|
|
@@ -458,11 +479,6 @@ class InferenceClient(NamespacedClient):
|
|
|
458
479
|
|
|
459
480
|
<p>Create an AlibabaCloud AI Search inference endpoint.</p>
|
|
460
481
|
<p>Create an inference endpoint to perform an inference task with the <code>alibabacloud-ai-search</code> service.</p>
|
|
461
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
462
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
463
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
464
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
465
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
466
482
|
|
|
467
483
|
|
|
468
484
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-alibabacloud>`_
|
|
@@ -476,6 +492,8 @@ class InferenceClient(NamespacedClient):
|
|
|
476
492
|
:param chunking_settings: The chunking configuration object.
|
|
477
493
|
:param task_settings: Settings to configure the inference task. These settings
|
|
478
494
|
are specific to the task type you specified.
|
|
495
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
496
|
+
to be created.
|
|
479
497
|
"""
|
|
480
498
|
if task_type in SKIP_IN_PATH:
|
|
481
499
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -502,6 +520,8 @@ class InferenceClient(NamespacedClient):
|
|
|
502
520
|
__query["human"] = human
|
|
503
521
|
if pretty is not None:
|
|
504
522
|
__query["pretty"] = pretty
|
|
523
|
+
if timeout is not None:
|
|
524
|
+
__query["timeout"] = timeout
|
|
505
525
|
if not __body:
|
|
506
526
|
if service is not None:
|
|
507
527
|
__body["service"] = service
|
|
@@ -547,22 +567,18 @@ class InferenceClient(NamespacedClient):
|
|
|
547
567
|
human: t.Optional[bool] = None,
|
|
548
568
|
pretty: t.Optional[bool] = None,
|
|
549
569
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
570
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
550
571
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
551
572
|
) -> ObjectApiResponse[t.Any]:
|
|
552
573
|
"""
|
|
553
574
|
.. raw:: html
|
|
554
575
|
|
|
555
576
|
<p>Create an Amazon Bedrock inference endpoint.</p>
|
|
556
|
-
<p>
|
|
577
|
+
<p>Create an inference endpoint to perform an inference task with the <code>amazonbedrock</code> service.</p>
|
|
557
578
|
<blockquote>
|
|
558
579
|
<p>info
|
|
559
580
|
You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.</p>
|
|
560
581
|
</blockquote>
|
|
561
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
562
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
563
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
564
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
565
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
566
582
|
|
|
567
583
|
|
|
568
584
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-amazonbedrock>`_
|
|
@@ -576,6 +592,8 @@ class InferenceClient(NamespacedClient):
|
|
|
576
592
|
:param chunking_settings: The chunking configuration object.
|
|
577
593
|
:param task_settings: Settings to configure the inference task. These settings
|
|
578
594
|
are specific to the task type you specified.
|
|
595
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
596
|
+
to be created.
|
|
579
597
|
"""
|
|
580
598
|
if task_type in SKIP_IN_PATH:
|
|
581
599
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -602,6 +620,8 @@ class InferenceClient(NamespacedClient):
|
|
|
602
620
|
__query["human"] = human
|
|
603
621
|
if pretty is not None:
|
|
604
622
|
__query["pretty"] = pretty
|
|
623
|
+
if timeout is not None:
|
|
624
|
+
__query["timeout"] = timeout
|
|
605
625
|
if not __body:
|
|
606
626
|
if service is not None:
|
|
607
627
|
__body["service"] = service
|
|
@@ -647,6 +667,7 @@ class InferenceClient(NamespacedClient):
|
|
|
647
667
|
human: t.Optional[bool] = None,
|
|
648
668
|
pretty: t.Optional[bool] = None,
|
|
649
669
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
670
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
650
671
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
651
672
|
) -> ObjectApiResponse[t.Any]:
|
|
652
673
|
"""
|
|
@@ -654,11 +675,6 @@ class InferenceClient(NamespacedClient):
|
|
|
654
675
|
|
|
655
676
|
<p>Create an Anthropic inference endpoint.</p>
|
|
656
677
|
<p>Create an inference endpoint to perform an inference task with the <code>anthropic</code> service.</p>
|
|
657
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
658
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
659
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
660
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
661
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
662
678
|
|
|
663
679
|
|
|
664
680
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-anthropic>`_
|
|
@@ -673,6 +689,8 @@ class InferenceClient(NamespacedClient):
|
|
|
673
689
|
:param chunking_settings: The chunking configuration object.
|
|
674
690
|
:param task_settings: Settings to configure the inference task. These settings
|
|
675
691
|
are specific to the task type you specified.
|
|
692
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
693
|
+
to be created.
|
|
676
694
|
"""
|
|
677
695
|
if task_type in SKIP_IN_PATH:
|
|
678
696
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -699,6 +717,8 @@ class InferenceClient(NamespacedClient):
|
|
|
699
717
|
__query["human"] = human
|
|
700
718
|
if pretty is not None:
|
|
701
719
|
__query["pretty"] = pretty
|
|
720
|
+
if timeout is not None:
|
|
721
|
+
__query["timeout"] = timeout
|
|
702
722
|
if not __body:
|
|
703
723
|
if service is not None:
|
|
704
724
|
__body["service"] = service
|
|
@@ -744,6 +764,7 @@ class InferenceClient(NamespacedClient):
|
|
|
744
764
|
human: t.Optional[bool] = None,
|
|
745
765
|
pretty: t.Optional[bool] = None,
|
|
746
766
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
767
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
747
768
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
748
769
|
) -> ObjectApiResponse[t.Any]:
|
|
749
770
|
"""
|
|
@@ -751,11 +772,6 @@ class InferenceClient(NamespacedClient):
|
|
|
751
772
|
|
|
752
773
|
<p>Create an Azure AI studio inference endpoint.</p>
|
|
753
774
|
<p>Create an inference endpoint to perform an inference task with the <code>azureaistudio</code> service.</p>
|
|
754
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
755
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
756
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
757
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
758
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
759
775
|
|
|
760
776
|
|
|
761
777
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-azureaistudio>`_
|
|
@@ -769,6 +785,8 @@ class InferenceClient(NamespacedClient):
|
|
|
769
785
|
:param chunking_settings: The chunking configuration object.
|
|
770
786
|
:param task_settings: Settings to configure the inference task. These settings
|
|
771
787
|
are specific to the task type you specified.
|
|
788
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
789
|
+
to be created.
|
|
772
790
|
"""
|
|
773
791
|
if task_type in SKIP_IN_PATH:
|
|
774
792
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -795,6 +813,8 @@ class InferenceClient(NamespacedClient):
|
|
|
795
813
|
__query["human"] = human
|
|
796
814
|
if pretty is not None:
|
|
797
815
|
__query["pretty"] = pretty
|
|
816
|
+
if timeout is not None:
|
|
817
|
+
__query["timeout"] = timeout
|
|
798
818
|
if not __body:
|
|
799
819
|
if service is not None:
|
|
800
820
|
__body["service"] = service
|
|
@@ -840,6 +860,7 @@ class InferenceClient(NamespacedClient):
|
|
|
840
860
|
human: t.Optional[bool] = None,
|
|
841
861
|
pretty: t.Optional[bool] = None,
|
|
842
862
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
863
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
843
864
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
844
865
|
) -> ObjectApiResponse[t.Any]:
|
|
845
866
|
"""
|
|
@@ -853,11 +874,6 @@ class InferenceClient(NamespacedClient):
|
|
|
853
874
|
<li><a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35">GPT-3.5</a></li>
|
|
854
875
|
</ul>
|
|
855
876
|
<p>The list of embeddings models that you can choose from in your deployment can be found in the <a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure models documentation</a>.</p>
|
|
856
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
857
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
858
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
859
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
860
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
861
877
|
|
|
862
878
|
|
|
863
879
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-azureopenai>`_
|
|
@@ -873,6 +889,8 @@ class InferenceClient(NamespacedClient):
|
|
|
873
889
|
:param chunking_settings: The chunking configuration object.
|
|
874
890
|
:param task_settings: Settings to configure the inference task. These settings
|
|
875
891
|
are specific to the task type you specified.
|
|
892
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
893
|
+
to be created.
|
|
876
894
|
"""
|
|
877
895
|
if task_type in SKIP_IN_PATH:
|
|
878
896
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -899,6 +917,8 @@ class InferenceClient(NamespacedClient):
|
|
|
899
917
|
__query["human"] = human
|
|
900
918
|
if pretty is not None:
|
|
901
919
|
__query["pretty"] = pretty
|
|
920
|
+
if timeout is not None:
|
|
921
|
+
__query["timeout"] = timeout
|
|
902
922
|
if not __body:
|
|
903
923
|
if service is not None:
|
|
904
924
|
__body["service"] = service
|
|
@@ -944,6 +964,7 @@ class InferenceClient(NamespacedClient):
|
|
|
944
964
|
human: t.Optional[bool] = None,
|
|
945
965
|
pretty: t.Optional[bool] = None,
|
|
946
966
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
967
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
947
968
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
948
969
|
) -> ObjectApiResponse[t.Any]:
|
|
949
970
|
"""
|
|
@@ -951,11 +972,6 @@ class InferenceClient(NamespacedClient):
|
|
|
951
972
|
|
|
952
973
|
<p>Create a Cohere inference endpoint.</p>
|
|
953
974
|
<p>Create an inference endpoint to perform an inference task with the <code>cohere</code> service.</p>
|
|
954
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
955
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
956
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
957
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
958
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
959
975
|
|
|
960
976
|
|
|
961
977
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-cohere>`_
|
|
@@ -969,6 +985,8 @@ class InferenceClient(NamespacedClient):
|
|
|
969
985
|
:param chunking_settings: The chunking configuration object.
|
|
970
986
|
:param task_settings: Settings to configure the inference task. These settings
|
|
971
987
|
are specific to the task type you specified.
|
|
988
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
989
|
+
to be created.
|
|
972
990
|
"""
|
|
973
991
|
if task_type in SKIP_IN_PATH:
|
|
974
992
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -993,6 +1011,8 @@ class InferenceClient(NamespacedClient):
|
|
|
993
1011
|
__query["human"] = human
|
|
994
1012
|
if pretty is not None:
|
|
995
1013
|
__query["pretty"] = pretty
|
|
1014
|
+
if timeout is not None:
|
|
1015
|
+
__query["timeout"] = timeout
|
|
996
1016
|
if not __body:
|
|
997
1017
|
if service is not None:
|
|
998
1018
|
__body["service"] = service
|
|
@@ -1040,6 +1060,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1040
1060
|
human: t.Optional[bool] = None,
|
|
1041
1061
|
pretty: t.Optional[bool] = None,
|
|
1042
1062
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1063
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1043
1064
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1044
1065
|
) -> ObjectApiResponse[t.Any]:
|
|
1045
1066
|
"""
|
|
@@ -1074,6 +1095,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1074
1095
|
:param chunking_settings: The chunking configuration object.
|
|
1075
1096
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1076
1097
|
are specific to the task type you specified.
|
|
1098
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1099
|
+
to be created.
|
|
1077
1100
|
"""
|
|
1078
1101
|
if task_type in SKIP_IN_PATH:
|
|
1079
1102
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1100,6 +1123,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1100
1123
|
__query["human"] = human
|
|
1101
1124
|
if pretty is not None:
|
|
1102
1125
|
__query["pretty"] = pretty
|
|
1126
|
+
if timeout is not None:
|
|
1127
|
+
__query["timeout"] = timeout
|
|
1103
1128
|
if not __body:
|
|
1104
1129
|
if service is not None:
|
|
1105
1130
|
__body["service"] = service
|
|
@@ -1139,6 +1164,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1139
1164
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1140
1165
|
human: t.Optional[bool] = None,
|
|
1141
1166
|
pretty: t.Optional[bool] = None,
|
|
1167
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1142
1168
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1143
1169
|
) -> ObjectApiResponse[t.Any]:
|
|
1144
1170
|
"""
|
|
@@ -1171,6 +1197,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1171
1197
|
:param service_settings: Settings used to install the inference model. These
|
|
1172
1198
|
settings are specific to the `elser` service.
|
|
1173
1199
|
:param chunking_settings: The chunking configuration object.
|
|
1200
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1201
|
+
to be created.
|
|
1174
1202
|
"""
|
|
1175
1203
|
if task_type in SKIP_IN_PATH:
|
|
1176
1204
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1195,6 +1223,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1195
1223
|
__query["human"] = human
|
|
1196
1224
|
if pretty is not None:
|
|
1197
1225
|
__query["pretty"] = pretty
|
|
1226
|
+
if timeout is not None:
|
|
1227
|
+
__query["timeout"] = timeout
|
|
1198
1228
|
if not __body:
|
|
1199
1229
|
if service is not None:
|
|
1200
1230
|
__body["service"] = service
|
|
@@ -1232,6 +1262,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1232
1262
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1233
1263
|
human: t.Optional[bool] = None,
|
|
1234
1264
|
pretty: t.Optional[bool] = None,
|
|
1265
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1235
1266
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1236
1267
|
) -> ObjectApiResponse[t.Any]:
|
|
1237
1268
|
"""
|
|
@@ -1239,11 +1270,6 @@ class InferenceClient(NamespacedClient):
|
|
|
1239
1270
|
|
|
1240
1271
|
<p>Create an Google AI Studio inference endpoint.</p>
|
|
1241
1272
|
<p>Create an inference endpoint to perform an inference task with the <code>googleaistudio</code> service.</p>
|
|
1242
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1243
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1244
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1245
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1246
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1247
1273
|
|
|
1248
1274
|
|
|
1249
1275
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-googleaistudio>`_
|
|
@@ -1255,6 +1281,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1255
1281
|
:param service_settings: Settings used to install the inference model. These
|
|
1256
1282
|
settings are specific to the `googleaistudio` service.
|
|
1257
1283
|
:param chunking_settings: The chunking configuration object.
|
|
1284
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1285
|
+
to be created.
|
|
1258
1286
|
"""
|
|
1259
1287
|
if task_type in SKIP_IN_PATH:
|
|
1260
1288
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1281,6 +1309,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1281
1309
|
__query["human"] = human
|
|
1282
1310
|
if pretty is not None:
|
|
1283
1311
|
__query["pretty"] = pretty
|
|
1312
|
+
if timeout is not None:
|
|
1313
|
+
__query["timeout"] = timeout
|
|
1284
1314
|
if not __body:
|
|
1285
1315
|
if service is not None:
|
|
1286
1316
|
__body["service"] = service
|
|
@@ -1324,6 +1354,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1324
1354
|
human: t.Optional[bool] = None,
|
|
1325
1355
|
pretty: t.Optional[bool] = None,
|
|
1326
1356
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1357
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1327
1358
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1328
1359
|
) -> ObjectApiResponse[t.Any]:
|
|
1329
1360
|
"""
|
|
@@ -1331,11 +1362,6 @@ class InferenceClient(NamespacedClient):
|
|
|
1331
1362
|
|
|
1332
1363
|
<p>Create a Google Vertex AI inference endpoint.</p>
|
|
1333
1364
|
<p>Create an inference endpoint to perform an inference task with the <code>googlevertexai</code> service.</p>
|
|
1334
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1335
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1336
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1337
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1338
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1339
1365
|
|
|
1340
1366
|
|
|
1341
1367
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-googlevertexai>`_
|
|
@@ -1349,6 +1375,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1349
1375
|
:param chunking_settings: The chunking configuration object.
|
|
1350
1376
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1351
1377
|
are specific to the task type you specified.
|
|
1378
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1379
|
+
to be created.
|
|
1352
1380
|
"""
|
|
1353
1381
|
if task_type in SKIP_IN_PATH:
|
|
1354
1382
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1375,6 +1403,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1375
1403
|
__query["human"] = human
|
|
1376
1404
|
if pretty is not None:
|
|
1377
1405
|
__query["pretty"] = pretty
|
|
1406
|
+
if timeout is not None:
|
|
1407
|
+
__query["timeout"] = timeout
|
|
1378
1408
|
if not __body:
|
|
1379
1409
|
if service is not None:
|
|
1380
1410
|
__body["service"] = service
|
|
@@ -1414,6 +1444,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1414
1444
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1415
1445
|
human: t.Optional[bool] = None,
|
|
1416
1446
|
pretty: t.Optional[bool] = None,
|
|
1447
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1417
1448
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1418
1449
|
) -> ObjectApiResponse[t.Any]:
|
|
1419
1450
|
"""
|
|
@@ -1434,11 +1465,6 @@ class InferenceClient(NamespacedClient):
|
|
|
1434
1465
|
<li><code>multilingual-e5-base</code></li>
|
|
1435
1466
|
<li><code>multilingual-e5-small</code></li>
|
|
1436
1467
|
</ul>
|
|
1437
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1438
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1439
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1440
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1441
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1442
1468
|
|
|
1443
1469
|
|
|
1444
1470
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-hugging-face>`_
|
|
@@ -1450,6 +1476,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1450
1476
|
:param service_settings: Settings used to install the inference model. These
|
|
1451
1477
|
settings are specific to the `hugging_face` service.
|
|
1452
1478
|
:param chunking_settings: The chunking configuration object.
|
|
1479
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1480
|
+
to be created.
|
|
1453
1481
|
"""
|
|
1454
1482
|
if task_type in SKIP_IN_PATH:
|
|
1455
1483
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1476,6 +1504,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1476
1504
|
__query["human"] = human
|
|
1477
1505
|
if pretty is not None:
|
|
1478
1506
|
__query["pretty"] = pretty
|
|
1507
|
+
if timeout is not None:
|
|
1508
|
+
__query["timeout"] = timeout
|
|
1479
1509
|
if not __body:
|
|
1480
1510
|
if service is not None:
|
|
1481
1511
|
__body["service"] = service
|
|
@@ -1519,6 +1549,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1519
1549
|
human: t.Optional[bool] = None,
|
|
1520
1550
|
pretty: t.Optional[bool] = None,
|
|
1521
1551
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1552
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1522
1553
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1523
1554
|
) -> ObjectApiResponse[t.Any]:
|
|
1524
1555
|
"""
|
|
@@ -1528,11 +1559,6 @@ class InferenceClient(NamespacedClient):
|
|
|
1528
1559
|
<p>Create an inference endpoint to perform an inference task with the <code>jinaai</code> service.</p>
|
|
1529
1560
|
<p>To review the available <code>rerank</code> models, refer to <a href="https://jina.ai/reranker">https://jina.ai/reranker</a>.
|
|
1530
1561
|
To review the available <code>text_embedding</code> models, refer to the <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.</p>
|
|
1531
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1532
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1533
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1534
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1535
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1536
1562
|
|
|
1537
1563
|
|
|
1538
1564
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-jinaai>`_
|
|
@@ -1546,6 +1572,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1546
1572
|
:param chunking_settings: The chunking configuration object.
|
|
1547
1573
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1548
1574
|
are specific to the task type you specified.
|
|
1575
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1576
|
+
to be created.
|
|
1549
1577
|
"""
|
|
1550
1578
|
if task_type in SKIP_IN_PATH:
|
|
1551
1579
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1570,6 +1598,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1570
1598
|
__query["human"] = human
|
|
1571
1599
|
if pretty is not None:
|
|
1572
1600
|
__query["pretty"] = pretty
|
|
1601
|
+
if timeout is not None:
|
|
1602
|
+
__query["timeout"] = timeout
|
|
1573
1603
|
if not __body:
|
|
1574
1604
|
if service is not None:
|
|
1575
1605
|
__body["service"] = service
|
|
@@ -1609,6 +1639,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1609
1639
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1610
1640
|
human: t.Optional[bool] = None,
|
|
1611
1641
|
pretty: t.Optional[bool] = None,
|
|
1642
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1612
1643
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1613
1644
|
) -> ObjectApiResponse[t.Any]:
|
|
1614
1645
|
"""
|
|
@@ -1616,11 +1647,6 @@ class InferenceClient(NamespacedClient):
|
|
|
1616
1647
|
|
|
1617
1648
|
<p>Create a Mistral inference endpoint.</p>
|
|
1618
1649
|
<p>Creates an inference endpoint to perform an inference task with the <code>mistral</code> service.</p>
|
|
1619
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1620
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1621
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1622
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1623
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1624
1650
|
|
|
1625
1651
|
|
|
1626
1652
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-mistral>`_
|
|
@@ -1633,6 +1659,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1633
1659
|
:param service_settings: Settings used to install the inference model. These
|
|
1634
1660
|
settings are specific to the `mistral` service.
|
|
1635
1661
|
:param chunking_settings: The chunking configuration object.
|
|
1662
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1663
|
+
to be created.
|
|
1636
1664
|
"""
|
|
1637
1665
|
if task_type in SKIP_IN_PATH:
|
|
1638
1666
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1657,6 +1685,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1657
1685
|
__query["human"] = human
|
|
1658
1686
|
if pretty is not None:
|
|
1659
1687
|
__query["pretty"] = pretty
|
|
1688
|
+
if timeout is not None:
|
|
1689
|
+
__query["timeout"] = timeout
|
|
1660
1690
|
if not __body:
|
|
1661
1691
|
if service is not None:
|
|
1662
1692
|
__body["service"] = service
|
|
@@ -1702,18 +1732,14 @@ class InferenceClient(NamespacedClient):
|
|
|
1702
1732
|
human: t.Optional[bool] = None,
|
|
1703
1733
|
pretty: t.Optional[bool] = None,
|
|
1704
1734
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1735
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1705
1736
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1706
1737
|
) -> ObjectApiResponse[t.Any]:
|
|
1707
1738
|
"""
|
|
1708
1739
|
.. raw:: html
|
|
1709
1740
|
|
|
1710
1741
|
<p>Create an OpenAI inference endpoint.</p>
|
|
1711
|
-
<p>Create an inference endpoint to perform an inference task with the <code>openai</code> service.</p>
|
|
1712
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1713
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1714
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1715
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1716
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1742
|
+
<p>Create an inference endpoint to perform an inference task with the <code>openai</code> service or <code>openai</code> compatible APIs.</p>
|
|
1717
1743
|
|
|
1718
1744
|
|
|
1719
1745
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-openai>`_
|
|
@@ -1729,6 +1755,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1729
1755
|
:param chunking_settings: The chunking configuration object.
|
|
1730
1756
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1731
1757
|
are specific to the task type you specified.
|
|
1758
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1759
|
+
to be created.
|
|
1732
1760
|
"""
|
|
1733
1761
|
if task_type in SKIP_IN_PATH:
|
|
1734
1762
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1753,6 +1781,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1753
1781
|
__query["human"] = human
|
|
1754
1782
|
if pretty is not None:
|
|
1755
1783
|
__query["pretty"] = pretty
|
|
1784
|
+
if timeout is not None:
|
|
1785
|
+
__query["timeout"] = timeout
|
|
1756
1786
|
if not __body:
|
|
1757
1787
|
if service is not None:
|
|
1758
1788
|
__body["service"] = service
|
|
@@ -1798,6 +1828,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1798
1828
|
human: t.Optional[bool] = None,
|
|
1799
1829
|
pretty: t.Optional[bool] = None,
|
|
1800
1830
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1831
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1801
1832
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1802
1833
|
) -> ObjectApiResponse[t.Any]:
|
|
1803
1834
|
"""
|
|
@@ -1819,6 +1850,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1819
1850
|
:param chunking_settings: The chunking configuration object.
|
|
1820
1851
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1821
1852
|
are specific to the task type you specified.
|
|
1853
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1854
|
+
to be created.
|
|
1822
1855
|
"""
|
|
1823
1856
|
if task_type in SKIP_IN_PATH:
|
|
1824
1857
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1843,6 +1876,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1843
1876
|
__query["human"] = human
|
|
1844
1877
|
if pretty is not None:
|
|
1845
1878
|
__query["pretty"] = pretty
|
|
1879
|
+
if timeout is not None:
|
|
1880
|
+
__query["timeout"] = timeout
|
|
1846
1881
|
if not __body:
|
|
1847
1882
|
if service is not None:
|
|
1848
1883
|
__body["service"] = service
|
|
@@ -1881,6 +1916,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1881
1916
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1882
1917
|
human: t.Optional[bool] = None,
|
|
1883
1918
|
pretty: t.Optional[bool] = None,
|
|
1919
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1884
1920
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1885
1921
|
) -> ObjectApiResponse[t.Any]:
|
|
1886
1922
|
"""
|
|
@@ -1890,11 +1926,6 @@ class InferenceClient(NamespacedClient):
|
|
|
1890
1926
|
<p>Create an inference endpoint to perform an inference task with the <code>watsonxai</code> service.
|
|
1891
1927
|
You need an IBM Cloud Databases for Elasticsearch deployment to use the <code>watsonxai</code> inference service.
|
|
1892
1928
|
You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.</p>
|
|
1893
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1894
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1895
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1896
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1897
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1898
1929
|
|
|
1899
1930
|
|
|
1900
1931
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-put-watsonx>`_
|
|
@@ -1906,6 +1937,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1906
1937
|
this case, `watsonxai`.
|
|
1907
1938
|
:param service_settings: Settings used to install the inference model. These
|
|
1908
1939
|
settings are specific to the `watsonxai` service.
|
|
1940
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1941
|
+
to be created.
|
|
1909
1942
|
"""
|
|
1910
1943
|
if task_type in SKIP_IN_PATH:
|
|
1911
1944
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1930,6 +1963,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1930
1963
|
__query["human"] = human
|
|
1931
1964
|
if pretty is not None:
|
|
1932
1965
|
__query["pretty"] = pretty
|
|
1966
|
+
if timeout is not None:
|
|
1967
|
+
__query["timeout"] = timeout
|
|
1933
1968
|
if not __body:
|
|
1934
1969
|
if service is not None:
|
|
1935
1970
|
__body["service"] = service
|
|
@@ -1970,7 +2005,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1970
2005
|
"""
|
|
1971
2006
|
.. raw:: html
|
|
1972
2007
|
|
|
1973
|
-
<p>Perform
|
|
2008
|
+
<p>Perform reranking inference on the service</p>
|
|
1974
2009
|
|
|
1975
2010
|
|
|
1976
2011
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-inference-inference>`_
|