elasticsearch 8.18.1__py3-none-any.whl → 8.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. elasticsearch/_async/client/__init__.py +56 -76
  2. elasticsearch/_async/client/async_search.py +5 -9
  3. elasticsearch/_async/client/autoscaling.py +4 -4
  4. elasticsearch/_async/client/cat.py +620 -65
  5. elasticsearch/_async/client/ccr.py +13 -13
  6. elasticsearch/_async/client/cluster.py +31 -22
  7. elasticsearch/_async/client/connector.py +30 -30
  8. elasticsearch/_async/client/dangling_indices.py +3 -3
  9. elasticsearch/_async/client/enrich.py +5 -5
  10. elasticsearch/_async/client/eql.py +13 -5
  11. elasticsearch/_async/client/esql.py +38 -9
  12. elasticsearch/_async/client/features.py +2 -2
  13. elasticsearch/_async/client/fleet.py +13 -13
  14. elasticsearch/_async/client/graph.py +1 -1
  15. elasticsearch/_async/client/ilm.py +11 -11
  16. elasticsearch/_async/client/indices.py +131 -82
  17. elasticsearch/_async/client/inference.py +411 -112
  18. elasticsearch/_async/client/ingest.py +9 -16
  19. elasticsearch/_async/client/license.py +10 -10
  20. elasticsearch/_async/client/logstash.py +3 -3
  21. elasticsearch/_async/client/migration.py +3 -3
  22. elasticsearch/_async/client/ml.py +76 -88
  23. elasticsearch/_async/client/nodes.py +9 -8
  24. elasticsearch/_async/client/query_rules.py +8 -8
  25. elasticsearch/_async/client/rollup.py +8 -8
  26. elasticsearch/_async/client/search_application.py +10 -10
  27. elasticsearch/_async/client/searchable_snapshots.py +4 -4
  28. elasticsearch/_async/client/security.py +72 -80
  29. elasticsearch/_async/client/shutdown.py +3 -3
  30. elasticsearch/_async/client/simulate.py +1 -1
  31. elasticsearch/_async/client/slm.py +9 -9
  32. elasticsearch/_async/client/snapshot.py +19 -13
  33. elasticsearch/_async/client/sql.py +6 -6
  34. elasticsearch/_async/client/ssl.py +1 -1
  35. elasticsearch/_async/client/synonyms.py +7 -7
  36. elasticsearch/_async/client/tasks.py +3 -3
  37. elasticsearch/_async/client/text_structure.py +4 -4
  38. elasticsearch/_async/client/transform.py +9 -9
  39. elasticsearch/_async/client/xpack.py +1 -1
  40. elasticsearch/_sync/client/__init__.py +56 -76
  41. elasticsearch/_sync/client/async_search.py +5 -9
  42. elasticsearch/_sync/client/autoscaling.py +4 -4
  43. elasticsearch/_sync/client/cat.py +620 -65
  44. elasticsearch/_sync/client/ccr.py +13 -13
  45. elasticsearch/_sync/client/cluster.py +31 -22
  46. elasticsearch/_sync/client/connector.py +30 -30
  47. elasticsearch/_sync/client/dangling_indices.py +3 -3
  48. elasticsearch/_sync/client/enrich.py +5 -5
  49. elasticsearch/_sync/client/eql.py +13 -5
  50. elasticsearch/_sync/client/esql.py +38 -9
  51. elasticsearch/_sync/client/features.py +2 -2
  52. elasticsearch/_sync/client/fleet.py +13 -13
  53. elasticsearch/_sync/client/graph.py +1 -1
  54. elasticsearch/_sync/client/ilm.py +11 -11
  55. elasticsearch/_sync/client/indices.py +131 -82
  56. elasticsearch/_sync/client/inference.py +411 -112
  57. elasticsearch/_sync/client/ingest.py +9 -16
  58. elasticsearch/_sync/client/license.py +10 -10
  59. elasticsearch/_sync/client/logstash.py +3 -3
  60. elasticsearch/_sync/client/migration.py +3 -3
  61. elasticsearch/_sync/client/ml.py +76 -88
  62. elasticsearch/_sync/client/nodes.py +9 -8
  63. elasticsearch/_sync/client/query_rules.py +8 -8
  64. elasticsearch/_sync/client/rollup.py +8 -8
  65. elasticsearch/_sync/client/search_application.py +10 -10
  66. elasticsearch/_sync/client/searchable_snapshots.py +4 -4
  67. elasticsearch/_sync/client/security.py +72 -80
  68. elasticsearch/_sync/client/shutdown.py +3 -3
  69. elasticsearch/_sync/client/simulate.py +1 -1
  70. elasticsearch/_sync/client/slm.py +9 -9
  71. elasticsearch/_sync/client/snapshot.py +19 -13
  72. elasticsearch/_sync/client/sql.py +6 -6
  73. elasticsearch/_sync/client/ssl.py +1 -1
  74. elasticsearch/_sync/client/synonyms.py +7 -7
  75. elasticsearch/_sync/client/tasks.py +3 -3
  76. elasticsearch/_sync/client/text_structure.py +4 -4
  77. elasticsearch/_sync/client/transform.py +9 -9
  78. elasticsearch/_sync/client/xpack.py +1 -1
  79. elasticsearch/_version.py +1 -1
  80. elasticsearch/compat.py +5 -0
  81. elasticsearch/dsl/__init__.py +2 -1
  82. elasticsearch/dsl/_async/document.py +1 -1
  83. elasticsearch/dsl/_sync/document.py +1 -1
  84. elasticsearch/dsl/aggs.py +2 -3
  85. elasticsearch/dsl/document_base.py +176 -16
  86. elasticsearch/dsl/field.py +223 -38
  87. elasticsearch/dsl/query.py +49 -4
  88. elasticsearch/dsl/types.py +107 -16
  89. elasticsearch/dsl/utils.py +1 -1
  90. elasticsearch/esql/__init__.py +18 -0
  91. elasticsearch/esql/esql.py +1105 -0
  92. elasticsearch/esql/functions.py +1738 -0
  93. elasticsearch/exceptions.py +2 -0
  94. {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.0.dist-info}/METADATA +1 -3
  95. elasticsearch-8.19.0.dist-info/RECORD +164 -0
  96. elasticsearch-8.18.1.dist-info/RECORD +0 -163
  97. elasticsearch-8.18.1.dist-info/licenses/LICENSE.txt +0 -175
  98. elasticsearch-8.18.1.dist-info/licenses/NOTICE.txt +0 -559
  99. {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.0.dist-info}/WHEEL +0 -0
  100. {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.0.dist-info}/licenses/LICENSE +0 -0
  101. {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.0.dist-info}/licenses/NOTICE +0 -0
@@ -47,7 +47,7 @@ class InferenceClient(NamespacedClient):
47
47
  <p>Perform completion inference on the service</p>
48
48
 
49
49
 
50
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html>`_
50
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
51
51
 
52
52
  :param inference_id: The inference Id
53
53
  :param input: Inference input. Either a string or an array of strings.
@@ -123,7 +123,7 @@ class InferenceClient(NamespacedClient):
123
123
  <p>Delete an inference endpoint</p>
124
124
 
125
125
 
126
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/delete-inference-api.html>`_
126
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/delete-inference-api.html>`_
127
127
 
128
128
  :param inference_id: The inference identifier.
129
129
  :param task_type: The task type
@@ -197,7 +197,7 @@ class InferenceClient(NamespacedClient):
197
197
  <p>Get an inference endpoint</p>
198
198
 
199
199
 
200
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/get-inference-api.html>`_
200
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/get-inference-api.html>`_
201
201
 
202
202
  :param task_type: The task type
203
203
  :param inference_id: The inference Id
@@ -235,7 +235,7 @@ class InferenceClient(NamespacedClient):
235
235
  )
236
236
 
237
237
  @_rewrite_parameters(
238
- body_fields=("input", "query", "task_settings"),
238
+ body_fields=("input", "input_type", "query", "task_settings"),
239
239
  )
240
240
  def inference(
241
241
  self,
@@ -257,6 +257,7 @@ class InferenceClient(NamespacedClient):
257
257
  error_trace: t.Optional[bool] = None,
258
258
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
259
259
  human: t.Optional[bool] = None,
260
+ input_type: t.Optional[str] = None,
260
261
  pretty: t.Optional[bool] = None,
261
262
  query: t.Optional[str] = None,
262
263
  task_settings: t.Optional[t.Any] = None,
@@ -277,13 +278,22 @@ class InferenceClient(NamespacedClient):
277
278
  </blockquote>
278
279
 
279
280
 
280
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html>`_
281
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
281
282
 
282
283
  :param inference_id: The unique identifier for the inference endpoint.
283
284
  :param input: The text on which you want to perform the inference task. It can
284
285
  be a single string or an array. > info > Inference endpoints for the `completion`
285
286
  task type currently only support a single string as input.
286
287
  :param task_type: The type of inference task that the model performs.
288
+ :param input_type: Specifies the input data type for the text embedding model.
289
+ The `input_type` parameter only applies to Inference Endpoints with the `text_embedding`
290
+ task type. Possible values include: * `SEARCH` * `INGEST` * `CLASSIFICATION`
291
+ * `CLUSTERING` Not all services support all values. Unsupported values will
292
+ trigger a validation exception. Accepted values depend on the configured
293
+ inference service, refer to the relevant service-specific documentation for
294
+ more info. > info > The `input_type` parameter specified on the root level
295
+ of the request body will take precedence over the `input_type` parameter
296
+ specified in `task_settings`.
287
297
  :param query: The query input, which is required only for the `rerank` task.
288
298
  It is not required for other tasks.
289
299
  :param task_settings: Task settings for the individual inference request. These
@@ -322,6 +332,8 @@ class InferenceClient(NamespacedClient):
322
332
  if not __body:
323
333
  if input is not None:
324
334
  __body["input"] = input
335
+ if input_type is not None:
336
+ __body["input_type"] = input_type
325
337
  if query is not None:
326
338
  __body["query"] = query
327
339
  if task_settings is not None:
@@ -366,26 +378,45 @@ class InferenceClient(NamespacedClient):
366
378
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
367
379
  human: t.Optional[bool] = None,
368
380
  pretty: t.Optional[bool] = None,
381
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
369
382
  ) -> ObjectApiResponse[t.Any]:
370
383
  """
371
384
  .. raw:: html
372
385
 
373
- <p>Create an inference endpoint.
374
- When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
375
- After creating the endpoint, wait for the model deployment to complete before using it.
376
- To verify the deployment status, use the get trained model statistics API.
377
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
378
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
386
+ <p>Create an inference endpoint.</p>
379
387
  <p>IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
380
388
  For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
381
389
  However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
390
+ <p>The following integrations are available through the inference API. You can find the available task types next to the integration name:</p>
391
+ <ul>
392
+ <li>AlibabaCloud AI Search (<code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
393
+ <li>Amazon Bedrock (<code>completion</code>, <code>text_embedding</code>)</li>
394
+ <li>Anthropic (<code>completion</code>)</li>
395
+ <li>Azure AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
396
+ <li>Azure OpenAI (<code>completion</code>, <code>text_embedding</code>)</li>
397
+ <li>Cohere (<code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
398
+ <li>DeepSeek (<code>completion</code>, <code>chat_completion</code>)</li>
399
+ <li>Elasticsearch (<code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code> - this service is for built-in models and models uploaded through Eland)</li>
400
+ <li>ELSER (<code>sparse_embedding</code>)</li>
401
+ <li>Google AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
402
+ <li>Google Vertex AI (<code>rerank</code>, <code>text_embedding</code>)</li>
403
+ <li>Hugging Face (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
404
+ <li>Mistral (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
405
+ <li>OpenAI (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
406
+ <li>VoyageAI (<code>text_embedding</code>, <code>rerank</code>)</li>
407
+ <li>Watsonx inference integration (<code>text_embedding</code>)</li>
408
+ <li>JinaAI (<code>text_embedding</code>, <code>rerank</code>)</li>
409
+ </ul>
382
410
 
383
411
 
384
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/put-inference-api.html>`_
412
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/put-inference-api.html>`_
385
413
 
386
414
  :param inference_id: The inference Id
387
415
  :param inference_config:
388
- :param task_type: The task type
416
+ :param task_type: The task type. Refer to the integration list in the API description
417
+ for the available task types.
418
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
419
+ to be created.
389
420
  """
390
421
  if inference_id in SKIP_IN_PATH:
391
422
  raise ValueError("Empty value passed for parameter 'inference_id'")
@@ -416,6 +447,8 @@ class InferenceClient(NamespacedClient):
416
447
  __query["human"] = human
417
448
  if pretty is not None:
418
449
  __query["pretty"] = pretty
450
+ if timeout is not None:
451
+ __query["timeout"] = timeout
419
452
  __body = inference_config if inference_config is not None else body
420
453
  __headers = {"accept": "application/json", "content-type": "application/json"}
421
454
  return self.perform_request( # type: ignore[return-value]
@@ -451,6 +484,7 @@ class InferenceClient(NamespacedClient):
451
484
  human: t.Optional[bool] = None,
452
485
  pretty: t.Optional[bool] = None,
453
486
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
487
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
454
488
  body: t.Optional[t.Dict[str, t.Any]] = None,
455
489
  ) -> ObjectApiResponse[t.Any]:
456
490
  """
@@ -458,14 +492,9 @@ class InferenceClient(NamespacedClient):
458
492
 
459
493
  <p>Create an AlibabaCloud AI Search inference endpoint.</p>
460
494
  <p>Create an inference endpoint to perform an inference task with the <code>alibabacloud-ai-search</code> service.</p>
461
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
462
- After creating the endpoint, wait for the model deployment to complete before using it.
463
- To verify the deployment status, use the get trained model statistics API.
464
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
465
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
466
495
 
467
496
 
468
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-alibabacloud-ai-search.html>`_
497
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-alibabacloud-ai-search.html>`_
469
498
 
470
499
  :param task_type: The type of the inference task that the model will perform.
471
500
  :param alibabacloud_inference_id: The unique identifier of the inference endpoint.
@@ -476,6 +505,8 @@ class InferenceClient(NamespacedClient):
476
505
  :param chunking_settings: The chunking configuration object.
477
506
  :param task_settings: Settings to configure the inference task. These settings
478
507
  are specific to the task type you specified.
508
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
509
+ to be created.
479
510
  """
480
511
  if task_type in SKIP_IN_PATH:
481
512
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -502,6 +533,8 @@ class InferenceClient(NamespacedClient):
502
533
  __query["human"] = human
503
534
  if pretty is not None:
504
535
  __query["pretty"] = pretty
536
+ if timeout is not None:
537
+ __query["timeout"] = timeout
505
538
  if not __body:
506
539
  if service is not None:
507
540
  __body["service"] = service
@@ -547,25 +580,21 @@ class InferenceClient(NamespacedClient):
547
580
  human: t.Optional[bool] = None,
548
581
  pretty: t.Optional[bool] = None,
549
582
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
583
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
550
584
  body: t.Optional[t.Dict[str, t.Any]] = None,
551
585
  ) -> ObjectApiResponse[t.Any]:
552
586
  """
553
587
  .. raw:: html
554
588
 
555
589
  <p>Create an Amazon Bedrock inference endpoint.</p>
556
- <p>Creates an inference endpoint to perform an inference task with the <code>amazonbedrock</code> service.</p>
590
+ <p>Create an inference endpoint to perform an inference task with the <code>amazonbedrock</code> service.</p>
557
591
  <blockquote>
558
592
  <p>info
559
593
  You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.</p>
560
594
  </blockquote>
561
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
562
- After creating the endpoint, wait for the model deployment to complete before using it.
563
- To verify the deployment status, use the get trained model statistics API.
564
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
565
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
566
595
 
567
596
 
568
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-amazon-bedrock.html>`_
597
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-amazon-bedrock.html>`_
569
598
 
570
599
  :param task_type: The type of the inference task that the model will perform.
571
600
  :param amazonbedrock_inference_id: The unique identifier of the inference endpoint.
@@ -576,6 +605,8 @@ class InferenceClient(NamespacedClient):
576
605
  :param chunking_settings: The chunking configuration object.
577
606
  :param task_settings: Settings to configure the inference task. These settings
578
607
  are specific to the task type you specified.
608
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
609
+ to be created.
579
610
  """
580
611
  if task_type in SKIP_IN_PATH:
581
612
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -602,6 +633,8 @@ class InferenceClient(NamespacedClient):
602
633
  __query["human"] = human
603
634
  if pretty is not None:
604
635
  __query["pretty"] = pretty
636
+ if timeout is not None:
637
+ __query["timeout"] = timeout
605
638
  if not __body:
606
639
  if service is not None:
607
640
  __body["service"] = service
@@ -647,6 +680,7 @@ class InferenceClient(NamespacedClient):
647
680
  human: t.Optional[bool] = None,
648
681
  pretty: t.Optional[bool] = None,
649
682
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
683
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
650
684
  body: t.Optional[t.Dict[str, t.Any]] = None,
651
685
  ) -> ObjectApiResponse[t.Any]:
652
686
  """
@@ -654,14 +688,9 @@ class InferenceClient(NamespacedClient):
654
688
 
655
689
  <p>Create an Anthropic inference endpoint.</p>
656
690
  <p>Create an inference endpoint to perform an inference task with the <code>anthropic</code> service.</p>
657
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
658
- After creating the endpoint, wait for the model deployment to complete before using it.
659
- To verify the deployment status, use the get trained model statistics API.
660
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
661
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
662
691
 
663
692
 
664
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-anthropic.html>`_
693
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-anthropic.html>`_
665
694
 
666
695
  :param task_type: The task type. The only valid task type for the model to perform
667
696
  is `completion`.
@@ -673,6 +702,8 @@ class InferenceClient(NamespacedClient):
673
702
  :param chunking_settings: The chunking configuration object.
674
703
  :param task_settings: Settings to configure the inference task. These settings
675
704
  are specific to the task type you specified.
705
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
706
+ to be created.
676
707
  """
677
708
  if task_type in SKIP_IN_PATH:
678
709
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -699,6 +730,8 @@ class InferenceClient(NamespacedClient):
699
730
  __query["human"] = human
700
731
  if pretty is not None:
701
732
  __query["pretty"] = pretty
733
+ if timeout is not None:
734
+ __query["timeout"] = timeout
702
735
  if not __body:
703
736
  if service is not None:
704
737
  __body["service"] = service
@@ -744,6 +777,7 @@ class InferenceClient(NamespacedClient):
744
777
  human: t.Optional[bool] = None,
745
778
  pretty: t.Optional[bool] = None,
746
779
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
780
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
747
781
  body: t.Optional[t.Dict[str, t.Any]] = None,
748
782
  ) -> ObjectApiResponse[t.Any]:
749
783
  """
@@ -751,14 +785,9 @@ class InferenceClient(NamespacedClient):
751
785
 
752
786
  <p>Create an Azure AI studio inference endpoint.</p>
753
787
  <p>Create an inference endpoint to perform an inference task with the <code>azureaistudio</code> service.</p>
754
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
755
- After creating the endpoint, wait for the model deployment to complete before using it.
756
- To verify the deployment status, use the get trained model statistics API.
757
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
758
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
759
788
 
760
789
 
761
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-ai-studio.html>`_
790
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-azure-ai-studio.html>`_
762
791
 
763
792
  :param task_type: The type of the inference task that the model will perform.
764
793
  :param azureaistudio_inference_id: The unique identifier of the inference endpoint.
@@ -769,6 +798,8 @@ class InferenceClient(NamespacedClient):
769
798
  :param chunking_settings: The chunking configuration object.
770
799
  :param task_settings: Settings to configure the inference task. These settings
771
800
  are specific to the task type you specified.
801
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
802
+ to be created.
772
803
  """
773
804
  if task_type in SKIP_IN_PATH:
774
805
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -795,6 +826,8 @@ class InferenceClient(NamespacedClient):
795
826
  __query["human"] = human
796
827
  if pretty is not None:
797
828
  __query["pretty"] = pretty
829
+ if timeout is not None:
830
+ __query["timeout"] = timeout
798
831
  if not __body:
799
832
  if service is not None:
800
833
  __body["service"] = service
@@ -840,6 +873,7 @@ class InferenceClient(NamespacedClient):
840
873
  human: t.Optional[bool] = None,
841
874
  pretty: t.Optional[bool] = None,
842
875
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
876
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
843
877
  body: t.Optional[t.Dict[str, t.Any]] = None,
844
878
  ) -> ObjectApiResponse[t.Any]:
845
879
  """
@@ -853,14 +887,9 @@ class InferenceClient(NamespacedClient):
853
887
  <li><a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35">GPT-3.5</a></li>
854
888
  </ul>
855
889
  <p>The list of embeddings models that you can choose from in your deployment can be found in the <a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure models documentation</a>.</p>
856
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
857
- After creating the endpoint, wait for the model deployment to complete before using it.
858
- To verify the deployment status, use the get trained model statistics API.
859
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
860
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
861
890
 
862
891
 
863
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-openai.html>`_
892
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-azure-openai.html>`_
864
893
 
865
894
  :param task_type: The type of the inference task that the model will perform.
866
895
  NOTE: The `chat_completion` task type only supports streaming and only through
@@ -873,6 +902,8 @@ class InferenceClient(NamespacedClient):
873
902
  :param chunking_settings: The chunking configuration object.
874
903
  :param task_settings: Settings to configure the inference task. These settings
875
904
  are specific to the task type you specified.
905
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
906
+ to be created.
876
907
  """
877
908
  if task_type in SKIP_IN_PATH:
878
909
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -899,6 +930,8 @@ class InferenceClient(NamespacedClient):
899
930
  __query["human"] = human
900
931
  if pretty is not None:
901
932
  __query["pretty"] = pretty
933
+ if timeout is not None:
934
+ __query["timeout"] = timeout
902
935
  if not __body:
903
936
  if service is not None:
904
937
  __body["service"] = service
@@ -944,6 +977,7 @@ class InferenceClient(NamespacedClient):
944
977
  human: t.Optional[bool] = None,
945
978
  pretty: t.Optional[bool] = None,
946
979
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
980
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
947
981
  body: t.Optional[t.Dict[str, t.Any]] = None,
948
982
  ) -> ObjectApiResponse[t.Any]:
949
983
  """
@@ -951,14 +985,9 @@ class InferenceClient(NamespacedClient):
951
985
 
952
986
  <p>Create a Cohere inference endpoint.</p>
953
987
  <p>Create an inference endpoint to perform an inference task with the <code>cohere</code> service.</p>
954
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
955
- After creating the endpoint, wait for the model deployment to complete before using it.
956
- To verify the deployment status, use the get trained model statistics API.
957
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
958
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
959
988
 
960
989
 
961
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-cohere.html>`_
990
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-cohere.html>`_
962
991
 
963
992
  :param task_type: The type of the inference task that the model will perform.
964
993
  :param cohere_inference_id: The unique identifier of the inference endpoint.
@@ -969,6 +998,8 @@ class InferenceClient(NamespacedClient):
969
998
  :param chunking_settings: The chunking configuration object.
970
999
  :param task_settings: Settings to configure the inference task. These settings
971
1000
  are specific to the task type you specified.
1001
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1002
+ to be created.
972
1003
  """
973
1004
  if task_type in SKIP_IN_PATH:
974
1005
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -993,6 +1024,8 @@ class InferenceClient(NamespacedClient):
993
1024
  __query["human"] = human
994
1025
  if pretty is not None:
995
1026
  __query["pretty"] = pretty
1027
+ if timeout is not None:
1028
+ __query["timeout"] = timeout
996
1029
  if not __body:
997
1030
  if service is not None:
998
1031
  __body["service"] = service
@@ -1017,6 +1050,221 @@ class InferenceClient(NamespacedClient):
1017
1050
  path_parts=__path_parts,
1018
1051
  )
1019
1052
 
1053
+ @_rewrite_parameters(
1054
+ body_fields=(
1055
+ "service",
1056
+ "service_settings",
1057
+ "chunking_settings",
1058
+ "task_settings",
1059
+ ),
1060
+ )
1061
+ def put_custom(
1062
+ self,
1063
+ *,
1064
+ task_type: t.Union[
1065
+ str, t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"]
1066
+ ],
1067
+ custom_inference_id: str,
1068
+ service: t.Optional[t.Union[str, t.Literal["custom"]]] = None,
1069
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1070
+ chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1071
+ error_trace: t.Optional[bool] = None,
1072
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1073
+ human: t.Optional[bool] = None,
1074
+ pretty: t.Optional[bool] = None,
1075
+ task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1076
+ body: t.Optional[t.Dict[str, t.Any]] = None,
1077
+ ) -> ObjectApiResponse[t.Any]:
1078
+ """
1079
+ .. raw:: html
1080
+
1081
+ <p>Create a custom inference endpoint.</p>
1082
+ <p>The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations.
1083
+ The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets.
1084
+ The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the value associated with that key.
1085
+ Templates are portions of a string that start with <code>${</code> and end with <code>}</code>.
1086
+ The parameters <code>secret_parameters</code> and <code>task_settings</code> are checked for keys for template replacement. Template replacement is supported in the <code>request</code>, <code>headers</code>, <code>url</code>, and <code>query_parameters</code>.
1087
+ If the definition (key) is not found for a template, an error message is returned.
1088
+ In case of an endpoint definition like the following:</p>
1089
+ <pre><code>PUT _inference/text_embedding/test-text-embedding
1090
+ {
1091
+ &quot;service&quot;: &quot;custom&quot;,
1092
+ &quot;service_settings&quot;: {
1093
+ &quot;secret_parameters&quot;: {
1094
+ &quot;api_key&quot;: &quot;&lt;some api key&gt;&quot;
1095
+ },
1096
+ &quot;url&quot;: &quot;...endpoints.huggingface.cloud/v1/embeddings&quot;,
1097
+ &quot;headers&quot;: {
1098
+ &quot;Authorization&quot;: &quot;Bearer ${api_key}&quot;,
1099
+ &quot;Content-Type&quot;: &quot;application/json&quot;
1100
+ },
1101
+ &quot;request&quot;: &quot;{\\&quot;input\\&quot;: ${input}}&quot;,
1102
+ &quot;response&quot;: {
1103
+ &quot;json_parser&quot;: {
1104
+ &quot;text_embeddings&quot;:&quot;$.data[*].embedding[*]&quot;
1105
+ }
1106
+ }
1107
+ }
1108
+ }
1109
+ </code></pre>
1110
+ <p>To replace <code>${api_key}</code> the <code>secret_parameters</code> and <code>task_settings</code> are checked for a key named <code>api_key</code>.</p>
1111
+ <blockquote>
1112
+ <p>info
1113
+ Templates should not be surrounded by quotes.</p>
1114
+ </blockquote>
1115
+ <p>Pre-defined templates:</p>
1116
+ <ul>
1117
+ <li><code>${input}</code> refers to the array of input strings that comes from the <code>input</code> field of the subsequent inference requests.</li>
1118
+ <li><code>${input_type}</code> refers to the input type translation values.</li>
1119
+ <li><code>${query}</code> refers to the query field used specifically for reranking tasks.</li>
1120
+ <li><code>${top_n}</code> refers to the <code>top_n</code> field available when performing rerank requests.</li>
1121
+ <li><code>${return_documents}</code> refers to the <code>return_documents</code> field available when performing rerank requests.</li>
1122
+ </ul>
1123
+
1124
+
1125
+ `<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom>`_
1126
+
1127
+ :param task_type: The type of the inference task that the model will perform.
1128
+ :param custom_inference_id: The unique identifier of the inference endpoint.
1129
+ :param service: The type of service supported for the specified task type. In
1130
+ this case, `custom`.
1131
+ :param service_settings: Settings used to install the inference model. These
1132
+ settings are specific to the `custom` service.
1133
+ :param chunking_settings: The chunking configuration object.
1134
+ :param task_settings: Settings to configure the inference task. These settings
1135
+ are specific to the task type you specified.
1136
+ """
1137
+ if task_type in SKIP_IN_PATH:
1138
+ raise ValueError("Empty value passed for parameter 'task_type'")
1139
+ if custom_inference_id in SKIP_IN_PATH:
1140
+ raise ValueError("Empty value passed for parameter 'custom_inference_id'")
1141
+ if service is None and body is None:
1142
+ raise ValueError("Empty value passed for parameter 'service'")
1143
+ if service_settings is None and body is None:
1144
+ raise ValueError("Empty value passed for parameter 'service_settings'")
1145
+ __path_parts: t.Dict[str, str] = {
1146
+ "task_type": _quote(task_type),
1147
+ "custom_inference_id": _quote(custom_inference_id),
1148
+ }
1149
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["custom_inference_id"]}'
1150
+ __query: t.Dict[str, t.Any] = {}
1151
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
1152
+ if error_trace is not None:
1153
+ __query["error_trace"] = error_trace
1154
+ if filter_path is not None:
1155
+ __query["filter_path"] = filter_path
1156
+ if human is not None:
1157
+ __query["human"] = human
1158
+ if pretty is not None:
1159
+ __query["pretty"] = pretty
1160
+ if not __body:
1161
+ if service is not None:
1162
+ __body["service"] = service
1163
+ if service_settings is not None:
1164
+ __body["service_settings"] = service_settings
1165
+ if chunking_settings is not None:
1166
+ __body["chunking_settings"] = chunking_settings
1167
+ if task_settings is not None:
1168
+ __body["task_settings"] = task_settings
1169
+ if not __body:
1170
+ __body = None # type: ignore[assignment]
1171
+ __headers = {"accept": "application/json"}
1172
+ if __body is not None:
1173
+ __headers["content-type"] = "application/json"
1174
+ return self.perform_request( # type: ignore[return-value]
1175
+ "PUT",
1176
+ __path,
1177
+ params=__query,
1178
+ headers=__headers,
1179
+ body=__body,
1180
+ endpoint_id="inference.put_custom",
1181
+ path_parts=__path_parts,
1182
+ )
1183
+
1184
+ @_rewrite_parameters(
1185
+ body_fields=("service", "service_settings", "chunking_settings"),
1186
+ )
1187
+ def put_deepseek(
1188
+ self,
1189
+ *,
1190
+ task_type: t.Union[str, t.Literal["chat_completion", "completion"]],
1191
+ deepseek_inference_id: str,
1192
+ service: t.Optional[t.Union[str, t.Literal["deepseek"]]] = None,
1193
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1194
+ chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1195
+ error_trace: t.Optional[bool] = None,
1196
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1197
+ human: t.Optional[bool] = None,
1198
+ pretty: t.Optional[bool] = None,
1199
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1200
+ body: t.Optional[t.Dict[str, t.Any]] = None,
1201
+ ) -> ObjectApiResponse[t.Any]:
1202
+ """
1203
+ .. raw:: html
1204
+
1205
+ <p>Create a DeepSeek inference endpoint.</p>
1206
+ <p>Create an inference endpoint to perform an inference task with the <code>deepseek</code> service.</p>
1207
+
1208
+
1209
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-deepseek.html>`_
1210
+
1211
+ :param task_type: The type of the inference task that the model will perform.
1212
+ :param deepseek_inference_id: The unique identifier of the inference endpoint.
1213
+ :param service: The type of service supported for the specified task type. In
1214
+ this case, `deepseek`.
1215
+ :param service_settings: Settings used to install the inference model. These
1216
+ settings are specific to the `deepseek` service.
1217
+ :param chunking_settings: The chunking configuration object.
1218
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1219
+ to be created.
1220
+ """
1221
+ if task_type in SKIP_IN_PATH:
1222
+ raise ValueError("Empty value passed for parameter 'task_type'")
1223
+ if deepseek_inference_id in SKIP_IN_PATH:
1224
+ raise ValueError("Empty value passed for parameter 'deepseek_inference_id'")
1225
+ if service is None and body is None:
1226
+ raise ValueError("Empty value passed for parameter 'service'")
1227
+ if service_settings is None and body is None:
1228
+ raise ValueError("Empty value passed for parameter 'service_settings'")
1229
+ __path_parts: t.Dict[str, str] = {
1230
+ "task_type": _quote(task_type),
1231
+ "deepseek_inference_id": _quote(deepseek_inference_id),
1232
+ }
1233
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["deepseek_inference_id"]}'
1234
+ __query: t.Dict[str, t.Any] = {}
1235
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
1236
+ if error_trace is not None:
1237
+ __query["error_trace"] = error_trace
1238
+ if filter_path is not None:
1239
+ __query["filter_path"] = filter_path
1240
+ if human is not None:
1241
+ __query["human"] = human
1242
+ if pretty is not None:
1243
+ __query["pretty"] = pretty
1244
+ if timeout is not None:
1245
+ __query["timeout"] = timeout
1246
+ if not __body:
1247
+ if service is not None:
1248
+ __body["service"] = service
1249
+ if service_settings is not None:
1250
+ __body["service_settings"] = service_settings
1251
+ if chunking_settings is not None:
1252
+ __body["chunking_settings"] = chunking_settings
1253
+ if not __body:
1254
+ __body = None # type: ignore[assignment]
1255
+ __headers = {"accept": "application/json"}
1256
+ if __body is not None:
1257
+ __headers["content-type"] = "application/json"
1258
+ return self.perform_request( # type: ignore[return-value]
1259
+ "PUT",
1260
+ __path,
1261
+ params=__query,
1262
+ headers=__headers,
1263
+ body=__body,
1264
+ endpoint_id="inference.put_deepseek",
1265
+ path_parts=__path_parts,
1266
+ )
1267
+
1020
1268
  @_rewrite_parameters(
1021
1269
  body_fields=(
1022
1270
  "service",
@@ -1040,6 +1288,7 @@ class InferenceClient(NamespacedClient):
1040
1288
  human: t.Optional[bool] = None,
1041
1289
  pretty: t.Optional[bool] = None,
1042
1290
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1291
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1043
1292
  body: t.Optional[t.Dict[str, t.Any]] = None,
1044
1293
  ) -> ObjectApiResponse[t.Any]:
1045
1294
  """
@@ -1062,7 +1311,7 @@ class InferenceClient(NamespacedClient):
1062
1311
  Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1063
1312
 
1064
1313
 
1065
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-elasticsearch.html>`_
1314
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-elasticsearch.html>`_
1066
1315
 
1067
1316
  :param task_type: The type of the inference task that the model will perform.
1068
1317
  :param elasticsearch_inference_id: The unique identifier of the inference endpoint.
@@ -1074,6 +1323,8 @@ class InferenceClient(NamespacedClient):
1074
1323
  :param chunking_settings: The chunking configuration object.
1075
1324
  :param task_settings: Settings to configure the inference task. These settings
1076
1325
  are specific to the task type you specified.
1326
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1327
+ to be created.
1077
1328
  """
1078
1329
  if task_type in SKIP_IN_PATH:
1079
1330
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1100,6 +1351,8 @@ class InferenceClient(NamespacedClient):
1100
1351
  __query["human"] = human
1101
1352
  if pretty is not None:
1102
1353
  __query["pretty"] = pretty
1354
+ if timeout is not None:
1355
+ __query["timeout"] = timeout
1103
1356
  if not __body:
1104
1357
  if service is not None:
1105
1358
  __body["service"] = service
@@ -1139,6 +1392,7 @@ class InferenceClient(NamespacedClient):
1139
1392
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1140
1393
  human: t.Optional[bool] = None,
1141
1394
  pretty: t.Optional[bool] = None,
1395
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1142
1396
  body: t.Optional[t.Dict[str, t.Any]] = None,
1143
1397
  ) -> ObjectApiResponse[t.Any]:
1144
1398
  """
@@ -1162,7 +1416,7 @@ class InferenceClient(NamespacedClient):
1162
1416
  Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1163
1417
 
1164
1418
 
1165
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-elser.html>`_
1419
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-elser.html>`_
1166
1420
 
1167
1421
  :param task_type: The type of the inference task that the model will perform.
1168
1422
  :param elser_inference_id: The unique identifier of the inference endpoint.
@@ -1171,6 +1425,8 @@ class InferenceClient(NamespacedClient):
1171
1425
  :param service_settings: Settings used to install the inference model. These
1172
1426
  settings are specific to the `elser` service.
1173
1427
  :param chunking_settings: The chunking configuration object.
1428
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1429
+ to be created.
1174
1430
  """
1175
1431
  if task_type in SKIP_IN_PATH:
1176
1432
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1195,6 +1451,8 @@ class InferenceClient(NamespacedClient):
1195
1451
  __query["human"] = human
1196
1452
  if pretty is not None:
1197
1453
  __query["pretty"] = pretty
1454
+ if timeout is not None:
1455
+ __query["timeout"] = timeout
1198
1456
  if not __body:
1199
1457
  if service is not None:
1200
1458
  __body["service"] = service
@@ -1232,6 +1490,7 @@ class InferenceClient(NamespacedClient):
1232
1490
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1233
1491
  human: t.Optional[bool] = None,
1234
1492
  pretty: t.Optional[bool] = None,
1493
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1235
1494
  body: t.Optional[t.Dict[str, t.Any]] = None,
1236
1495
  ) -> ObjectApiResponse[t.Any]:
1237
1496
  """
@@ -1239,14 +1498,9 @@ class InferenceClient(NamespacedClient):
1239
1498
 
1240
1499
  <p>Create an Google AI Studio inference endpoint.</p>
1241
1500
  <p>Create an inference endpoint to perform an inference task with the <code>googleaistudio</code> service.</p>
1242
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1243
- After creating the endpoint, wait for the model deployment to complete before using it.
1244
- To verify the deployment status, use the get trained model statistics API.
1245
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1246
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1247
1501
 
1248
1502
 
1249
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-google-ai-studio.html>`_
1503
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-google-ai-studio.html>`_
1250
1504
 
1251
1505
  :param task_type: The type of the inference task that the model will perform.
1252
1506
  :param googleaistudio_inference_id: The unique identifier of the inference endpoint.
@@ -1255,6 +1509,8 @@ class InferenceClient(NamespacedClient):
1255
1509
  :param service_settings: Settings used to install the inference model. These
1256
1510
  settings are specific to the `googleaistudio` service.
1257
1511
  :param chunking_settings: The chunking configuration object.
1512
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1513
+ to be created.
1258
1514
  """
1259
1515
  if task_type in SKIP_IN_PATH:
1260
1516
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1281,6 +1537,8 @@ class InferenceClient(NamespacedClient):
1281
1537
  __query["human"] = human
1282
1538
  if pretty is not None:
1283
1539
  __query["pretty"] = pretty
1540
+ if timeout is not None:
1541
+ __query["timeout"] = timeout
1284
1542
  if not __body:
1285
1543
  if service is not None:
1286
1544
  __body["service"] = service
@@ -1314,7 +1572,9 @@ class InferenceClient(NamespacedClient):
1314
1572
  def put_googlevertexai(
1315
1573
  self,
1316
1574
  *,
1317
- task_type: t.Union[str, t.Literal["rerank", "text_embedding"]],
1575
+ task_type: t.Union[
1576
+ str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
1577
+ ],
1318
1578
  googlevertexai_inference_id: str,
1319
1579
  service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None,
1320
1580
  service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
@@ -1324,6 +1584,7 @@ class InferenceClient(NamespacedClient):
1324
1584
  human: t.Optional[bool] = None,
1325
1585
  pretty: t.Optional[bool] = None,
1326
1586
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1587
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1327
1588
  body: t.Optional[t.Dict[str, t.Any]] = None,
1328
1589
  ) -> ObjectApiResponse[t.Any]:
1329
1590
  """
@@ -1331,14 +1592,9 @@ class InferenceClient(NamespacedClient):
1331
1592
 
1332
1593
  <p>Create a Google Vertex AI inference endpoint.</p>
1333
1594
  <p>Create an inference endpoint to perform an inference task with the <code>googlevertexai</code> service.</p>
1334
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1335
- After creating the endpoint, wait for the model deployment to complete before using it.
1336
- To verify the deployment status, use the get trained model statistics API.
1337
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1338
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1339
1595
 
1340
1596
 
1341
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-google-vertex-ai.html>`_
1597
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-google-vertex-ai.html>`_
1342
1598
 
1343
1599
  :param task_type: The type of the inference task that the model will perform.
1344
1600
  :param googlevertexai_inference_id: The unique identifier of the inference endpoint.
@@ -1349,6 +1605,8 @@ class InferenceClient(NamespacedClient):
1349
1605
  :param chunking_settings: The chunking configuration object.
1350
1606
  :param task_settings: Settings to configure the inference task. These settings
1351
1607
  are specific to the task type you specified.
1608
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1609
+ to be created.
1352
1610
  """
1353
1611
  if task_type in SKIP_IN_PATH:
1354
1612
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1375,6 +1633,8 @@ class InferenceClient(NamespacedClient):
1375
1633
  __query["human"] = human
1376
1634
  if pretty is not None:
1377
1635
  __query["pretty"] = pretty
1636
+ if timeout is not None:
1637
+ __query["timeout"] = timeout
1378
1638
  if not __body:
1379
1639
  if service is not None:
1380
1640
  __body["service"] = service
@@ -1400,12 +1660,19 @@ class InferenceClient(NamespacedClient):
1400
1660
  )
1401
1661
 
1402
1662
  @_rewrite_parameters(
1403
- body_fields=("service", "service_settings", "chunking_settings"),
1663
+ body_fields=(
1664
+ "service",
1665
+ "service_settings",
1666
+ "chunking_settings",
1667
+ "task_settings",
1668
+ ),
1404
1669
  )
1405
1670
  def put_hugging_face(
1406
1671
  self,
1407
1672
  *,
1408
- task_type: t.Union[str, t.Literal["text_embedding"]],
1673
+ task_type: t.Union[
1674
+ str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
1675
+ ],
1409
1676
  huggingface_inference_id: str,
1410
1677
  service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None,
1411
1678
  service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
@@ -1414,17 +1681,22 @@ class InferenceClient(NamespacedClient):
1414
1681
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1415
1682
  human: t.Optional[bool] = None,
1416
1683
  pretty: t.Optional[bool] = None,
1684
+ task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1685
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1417
1686
  body: t.Optional[t.Dict[str, t.Any]] = None,
1418
1687
  ) -> ObjectApiResponse[t.Any]:
1419
1688
  """
1420
1689
  .. raw:: html
1421
1690
 
1422
1691
  <p>Create a Hugging Face inference endpoint.</p>
1423
- <p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service.</p>
1424
- <p>You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.
1425
- Select the model you want to use on the new endpoint creation page (for example <code>intfloat/e5-small-v2</code>), then select the sentence embeddings task under the advanced configuration section.
1426
- Create the endpoint and copy the URL after the endpoint initialization has been finished.</p>
1427
- <p>The following models are recommended for the Hugging Face service:</p>
1692
+ <p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service.
1693
+ Supported tasks include: <code>text_embedding</code>, <code>completion</code>, and <code>chat_completion</code>.</p>
1694
+ <p>To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.
1695
+ Select a model that supports the task you intend to use.</p>
1696
+ <p>For Elastic's <code>text_embedding</code> task:
1697
+ The selected model must support the <code>Sentence Embeddings</code> task. On the new endpoint creation page, select the <code>Sentence Embeddings</code> task under the <code>Advanced Configuration</code> section.
1698
+ After the endpoint has initialized, copy the generated endpoint URL.
1699
+ Recommended models for <code>text_embedding</code> task:</p>
1428
1700
  <ul>
1429
1701
  <li><code>all-MiniLM-L6-v2</code></li>
1430
1702
  <li><code>all-MiniLM-L12-v2</code></li>
@@ -1434,14 +1706,27 @@ class InferenceClient(NamespacedClient):
1434
1706
  <li><code>multilingual-e5-base</code></li>
1435
1707
  <li><code>multilingual-e5-small</code></li>
1436
1708
  </ul>
1437
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1438
- After creating the endpoint, wait for the model deployment to complete before using it.
1439
- To verify the deployment status, use the get trained model statistics API.
1440
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1441
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1709
+ <p>For Elastic's <code>chat_completion</code> and <code>completion</code> tasks:
1710
+ The selected model must support the <code>Text Generation</code> task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for <code>Text Generation</code>. When creating dedicated endpoint select the <code>Text Generation</code> task.
1711
+ After the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes <code>/v1/chat/completions</code> part in URL. Then, copy the full endpoint URL for use.
1712
+ Recommended models for <code>chat_completion</code> and <code>completion</code> tasks:</p>
1713
+ <ul>
1714
+ <li><code>Mistral-7B-Instruct-v0.2</code></li>
1715
+ <li><code>QwQ-32B</code></li>
1716
+ <li><code>Phi-3-mini-128k-instruct</code></li>
1717
+ </ul>
1718
+ <p>For Elastic's <code>rerank</code> task:
1719
+ The selected model must support the <code>sentence-ranking</code> task and expose OpenAI API.
1720
+ HuggingFace supports only dedicated (not serverless) endpoints for <code>Rerank</code> so far.
1721
+ After the endpoint is initialized, copy the full endpoint URL for use.
1722
+ Tested models for <code>rerank</code> task:</p>
1723
+ <ul>
1724
+ <li><code>bge-reranker-base</code></li>
1725
+ <li><code>jina-reranker-v1-turbo-en-GGUF</code></li>
1726
+ </ul>
1442
1727
 
1443
1728
 
1444
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-hugging-face.html>`_
1729
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-hugging-face.html>`_
1445
1730
 
1446
1731
  :param task_type: The type of the inference task that the model will perform.
1447
1732
  :param huggingface_inference_id: The unique identifier of the inference endpoint.
@@ -1450,6 +1735,10 @@ class InferenceClient(NamespacedClient):
1450
1735
  :param service_settings: Settings used to install the inference model. These
1451
1736
  settings are specific to the `hugging_face` service.
1452
1737
  :param chunking_settings: The chunking configuration object.
1738
+ :param task_settings: Settings to configure the inference task. These settings
1739
+ are specific to the task type you specified.
1740
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1741
+ to be created.
1453
1742
  """
1454
1743
  if task_type in SKIP_IN_PATH:
1455
1744
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1476,6 +1765,8 @@ class InferenceClient(NamespacedClient):
1476
1765
  __query["human"] = human
1477
1766
  if pretty is not None:
1478
1767
  __query["pretty"] = pretty
1768
+ if timeout is not None:
1769
+ __query["timeout"] = timeout
1479
1770
  if not __body:
1480
1771
  if service is not None:
1481
1772
  __body["service"] = service
@@ -1483,6 +1774,8 @@ class InferenceClient(NamespacedClient):
1483
1774
  __body["service_settings"] = service_settings
1484
1775
  if chunking_settings is not None:
1485
1776
  __body["chunking_settings"] = chunking_settings
1777
+ if task_settings is not None:
1778
+ __body["task_settings"] = task_settings
1486
1779
  if not __body:
1487
1780
  __body = None # type: ignore[assignment]
1488
1781
  __headers = {"accept": "application/json"}
@@ -1519,6 +1812,7 @@ class InferenceClient(NamespacedClient):
1519
1812
  human: t.Optional[bool] = None,
1520
1813
  pretty: t.Optional[bool] = None,
1521
1814
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1815
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1522
1816
  body: t.Optional[t.Dict[str, t.Any]] = None,
1523
1817
  ) -> ObjectApiResponse[t.Any]:
1524
1818
  """
@@ -1528,14 +1822,9 @@ class InferenceClient(NamespacedClient):
1528
1822
  <p>Create an inference endpoint to perform an inference task with the <code>jinaai</code> service.</p>
1529
1823
  <p>To review the available <code>rerank</code> models, refer to <a href="https://jina.ai/reranker">https://jina.ai/reranker</a>.
1530
1824
  To review the available <code>text_embedding</code> models, refer to the <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.</p>
1531
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1532
- After creating the endpoint, wait for the model deployment to complete before using it.
1533
- To verify the deployment status, use the get trained model statistics API.
1534
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1535
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1536
1825
 
1537
1826
 
1538
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-jinaai.html>`_
1827
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-jinaai.html>`_
1539
1828
 
1540
1829
  :param task_type: The type of the inference task that the model will perform.
1541
1830
  :param jinaai_inference_id: The unique identifier of the inference endpoint.
@@ -1546,6 +1835,8 @@ class InferenceClient(NamespacedClient):
1546
1835
  :param chunking_settings: The chunking configuration object.
1547
1836
  :param task_settings: Settings to configure the inference task. These settings
1548
1837
  are specific to the task type you specified.
1838
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1839
+ to be created.
1549
1840
  """
1550
1841
  if task_type in SKIP_IN_PATH:
1551
1842
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1570,6 +1861,8 @@ class InferenceClient(NamespacedClient):
1570
1861
  __query["human"] = human
1571
1862
  if pretty is not None:
1572
1863
  __query["pretty"] = pretty
1864
+ if timeout is not None:
1865
+ __query["timeout"] = timeout
1573
1866
  if not __body:
1574
1867
  if service is not None:
1575
1868
  __body["service"] = service
@@ -1600,7 +1893,9 @@ class InferenceClient(NamespacedClient):
1600
1893
  def put_mistral(
1601
1894
  self,
1602
1895
  *,
1603
- task_type: t.Union[str, t.Literal["text_embedding"]],
1896
+ task_type: t.Union[
1897
+ str, t.Literal["chat_completion", "completion", "text_embedding"]
1898
+ ],
1604
1899
  mistral_inference_id: str,
1605
1900
  service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None,
1606
1901
  service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
@@ -1609,30 +1904,27 @@ class InferenceClient(NamespacedClient):
1609
1904
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1610
1905
  human: t.Optional[bool] = None,
1611
1906
  pretty: t.Optional[bool] = None,
1907
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1612
1908
  body: t.Optional[t.Dict[str, t.Any]] = None,
1613
1909
  ) -> ObjectApiResponse[t.Any]:
1614
1910
  """
1615
1911
  .. raw:: html
1616
1912
 
1617
1913
  <p>Create a Mistral inference endpoint.</p>
1618
- <p>Creates an inference endpoint to perform an inference task with the <code>mistral</code> service.</p>
1619
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1620
- After creating the endpoint, wait for the model deployment to complete before using it.
1621
- To verify the deployment status, use the get trained model statistics API.
1622
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1623
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1914
+ <p>Create an inference endpoint to perform an inference task with the <code>mistral</code> service.</p>
1624
1915
 
1625
1916
 
1626
- `<https://www.elastic.co/guide/en/elasticsearch/reference/{brnach}/infer-service-mistral.html>`_
1917
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-mistral.html>`_
1627
1918
 
1628
- :param task_type: The task type. The only valid task type for the model to perform
1629
- is `text_embedding`.
1919
+ :param task_type: The type of the inference task that the model will perform.
1630
1920
  :param mistral_inference_id: The unique identifier of the inference endpoint.
1631
1921
  :param service: The type of service supported for the specified task type. In
1632
1922
  this case, `mistral`.
1633
1923
  :param service_settings: Settings used to install the inference model. These
1634
1924
  settings are specific to the `mistral` service.
1635
1925
  :param chunking_settings: The chunking configuration object.
1926
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
1927
+ to be created.
1636
1928
  """
1637
1929
  if task_type in SKIP_IN_PATH:
1638
1930
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1657,6 +1949,8 @@ class InferenceClient(NamespacedClient):
1657
1949
  __query["human"] = human
1658
1950
  if pretty is not None:
1659
1951
  __query["pretty"] = pretty
1952
+ if timeout is not None:
1953
+ __query["timeout"] = timeout
1660
1954
  if not __body:
1661
1955
  if service is not None:
1662
1956
  __body["service"] = service
@@ -1702,6 +1996,7 @@ class InferenceClient(NamespacedClient):
1702
1996
  human: t.Optional[bool] = None,
1703
1997
  pretty: t.Optional[bool] = None,
1704
1998
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
1999
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1705
2000
  body: t.Optional[t.Dict[str, t.Any]] = None,
1706
2001
  ) -> ObjectApiResponse[t.Any]:
1707
2002
  """
@@ -1709,14 +2004,9 @@ class InferenceClient(NamespacedClient):
1709
2004
 
1710
2005
  <p>Create an OpenAI inference endpoint.</p>
1711
2006
  <p>Create an inference endpoint to perform an inference task with the <code>openai</code> service or <code>openai</code> compatible APIs.</p>
1712
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1713
- After creating the endpoint, wait for the model deployment to complete before using it.
1714
- To verify the deployment status, use the get trained model statistics API.
1715
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1716
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1717
2007
 
1718
2008
 
1719
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-openai.html>`_
2009
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-openai.html>`_
1720
2010
 
1721
2011
  :param task_type: The type of the inference task that the model will perform.
1722
2012
  NOTE: The `chat_completion` task type only supports streaming and only through
@@ -1729,6 +2019,8 @@ class InferenceClient(NamespacedClient):
1729
2019
  :param chunking_settings: The chunking configuration object.
1730
2020
  :param task_settings: Settings to configure the inference task. These settings
1731
2021
  are specific to the task type you specified.
2022
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
2023
+ to be created.
1732
2024
  """
1733
2025
  if task_type in SKIP_IN_PATH:
1734
2026
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1753,6 +2045,8 @@ class InferenceClient(NamespacedClient):
1753
2045
  __query["human"] = human
1754
2046
  if pretty is not None:
1755
2047
  __query["pretty"] = pretty
2048
+ if timeout is not None:
2049
+ __query["timeout"] = timeout
1756
2050
  if not __body:
1757
2051
  if service is not None:
1758
2052
  __body["service"] = service
@@ -1798,6 +2092,7 @@ class InferenceClient(NamespacedClient):
1798
2092
  human: t.Optional[bool] = None,
1799
2093
  pretty: t.Optional[bool] = None,
1800
2094
  task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
2095
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1801
2096
  body: t.Optional[t.Dict[str, t.Any]] = None,
1802
2097
  ) -> ObjectApiResponse[t.Any]:
1803
2098
  """
@@ -1808,7 +2103,7 @@ class InferenceClient(NamespacedClient):
1808
2103
  <p>Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1809
2104
 
1810
2105
 
1811
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-voyageai.html>`_
2106
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-voyageai.html>`_
1812
2107
 
1813
2108
  :param task_type: The type of the inference task that the model will perform.
1814
2109
  :param voyageai_inference_id: The unique identifier of the inference endpoint.
@@ -1819,6 +2114,8 @@ class InferenceClient(NamespacedClient):
1819
2114
  :param chunking_settings: The chunking configuration object.
1820
2115
  :param task_settings: Settings to configure the inference task. These settings
1821
2116
  are specific to the task type you specified.
2117
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
2118
+ to be created.
1822
2119
  """
1823
2120
  if task_type in SKIP_IN_PATH:
1824
2121
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1843,6 +2140,8 @@ class InferenceClient(NamespacedClient):
1843
2140
  __query["human"] = human
1844
2141
  if pretty is not None:
1845
2142
  __query["pretty"] = pretty
2143
+ if timeout is not None:
2144
+ __query["timeout"] = timeout
1846
2145
  if not __body:
1847
2146
  if service is not None:
1848
2147
  __body["service"] = service
@@ -1881,6 +2180,7 @@ class InferenceClient(NamespacedClient):
1881
2180
  filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
1882
2181
  human: t.Optional[bool] = None,
1883
2182
  pretty: t.Optional[bool] = None,
2183
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
1884
2184
  body: t.Optional[t.Dict[str, t.Any]] = None,
1885
2185
  ) -> ObjectApiResponse[t.Any]:
1886
2186
  """
@@ -1890,14 +2190,9 @@ class InferenceClient(NamespacedClient):
1890
2190
  <p>Create an inference endpoint to perform an inference task with the <code>watsonxai</code> service.
1891
2191
  You need an IBM Cloud Databases for Elasticsearch deployment to use the <code>watsonxai</code> inference service.
1892
2192
  You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.</p>
1893
- <p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
1894
- After creating the endpoint, wait for the model deployment to complete before using it.
1895
- To verify the deployment status, use the get trained model statistics API.
1896
- Look for <code>&quot;state&quot;: &quot;fully_allocated&quot;</code> in the response and ensure that the <code>&quot;allocation_count&quot;</code> matches the <code>&quot;target_allocation_count&quot;</code>.
1897
- Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
1898
2193
 
1899
2194
 
1900
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-watsonx-ai.html>`_
2195
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-watsonx-ai.html>`_
1901
2196
 
1902
2197
  :param task_type: The task type. The only valid task type for the model to perform
1903
2198
  is `text_embedding`.
@@ -1906,6 +2201,8 @@ class InferenceClient(NamespacedClient):
1906
2201
  this case, `watsonxai`.
1907
2202
  :param service_settings: Settings used to install the inference model. These
1908
2203
  settings are specific to the `watsonxai` service.
2204
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
2205
+ to be created.
1909
2206
  """
1910
2207
  if task_type in SKIP_IN_PATH:
1911
2208
  raise ValueError("Empty value passed for parameter 'task_type'")
@@ -1930,6 +2227,8 @@ class InferenceClient(NamespacedClient):
1930
2227
  __query["human"] = human
1931
2228
  if pretty is not None:
1932
2229
  __query["pretty"] = pretty
2230
+ if timeout is not None:
2231
+ __query["timeout"] = timeout
1933
2232
  if not __body:
1934
2233
  if service is not None:
1935
2234
  __body["service"] = service
@@ -1970,10 +2269,10 @@ class InferenceClient(NamespacedClient):
1970
2269
  """
1971
2270
  .. raw:: html
1972
2271
 
1973
- <p>Perform rereanking inference on the service</p>
2272
+ <p>Perform reranking inference on the service</p>
1974
2273
 
1975
2274
 
1976
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html>`_
2275
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
1977
2276
 
1978
2277
  :param inference_id: The unique identifier for the inference endpoint.
1979
2278
  :param input: The text on which you want to perform the inference task. It can
@@ -2049,7 +2348,7 @@ class InferenceClient(NamespacedClient):
2049
2348
  <p>Perform sparse embedding inference on the service</p>
2050
2349
 
2051
2350
 
2052
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html>`_
2351
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
2053
2352
 
2054
2353
  :param inference_id: The inference Id
2055
2354
  :param input: Inference input. Either a string or an array of strings.
@@ -2117,7 +2416,7 @@ class InferenceClient(NamespacedClient):
2117
2416
  <p>Perform text embedding inference on the service</p>
2118
2417
 
2119
2418
 
2120
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/post-inference-api.html>`_
2419
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
2121
2420
 
2122
2421
  :param inference_id: The inference Id
2123
2422
  :param input: Inference input. Either a string or an array of strings.
@@ -2199,7 +2498,7 @@ class InferenceClient(NamespacedClient):
2199
2498
  However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
2200
2499
 
2201
2500
 
2202
- `<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/update-inference-api.html>`_
2501
+ `<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/update-inference-api.html>`_
2203
2502
 
2204
2503
  :param inference_id: The unique identifier of the inference endpoint.
2205
2504
  :param inference_config: