elasticsearch 8.17.1__py3-none-any.whl → 8.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elasticsearch/__init__.py +2 -2
- elasticsearch/_async/client/__init__.py +2146 -859
- elasticsearch/_async/client/_base.py +0 -1
- elasticsearch/_async/client/async_search.py +44 -29
- elasticsearch/_async/client/autoscaling.py +32 -26
- elasticsearch/_async/client/cat.py +244 -169
- elasticsearch/_async/client/ccr.py +268 -128
- elasticsearch/_async/client/cluster.py +191 -164
- elasticsearch/_async/client/connector.py +212 -116
- elasticsearch/_async/client/dangling_indices.py +22 -16
- elasticsearch/_async/client/enrich.py +51 -11
- elasticsearch/_async/client/eql.py +54 -13
- elasticsearch/_async/client/esql.py +352 -4
- elasticsearch/_async/client/features.py +37 -27
- elasticsearch/_async/client/fleet.py +22 -10
- elasticsearch/_async/client/graph.py +10 -9
- elasticsearch/_async/client/ilm.py +108 -77
- elasticsearch/_async/client/indices.py +1112 -677
- elasticsearch/_async/client/inference.py +1875 -133
- elasticsearch/_async/client/ingest.py +83 -50
- elasticsearch/_async/client/license.py +90 -38
- elasticsearch/_async/client/logstash.py +20 -9
- elasticsearch/_async/client/migration.py +26 -17
- elasticsearch/_async/client/ml.py +642 -365
- elasticsearch/_async/client/monitoring.py +6 -3
- elasticsearch/_async/client/nodes.py +51 -53
- elasticsearch/_async/client/query_rules.py +59 -33
- elasticsearch/_async/client/rollup.py +124 -86
- elasticsearch/_async/client/search_application.py +60 -32
- elasticsearch/_async/client/searchable_snapshots.py +25 -12
- elasticsearch/_async/client/security.py +896 -558
- elasticsearch/_async/client/shutdown.py +34 -36
- elasticsearch/_async/client/simulate.py +22 -28
- elasticsearch/_async/client/slm.py +65 -40
- elasticsearch/_async/client/snapshot.py +190 -213
- elasticsearch/_async/client/sql.py +43 -22
- elasticsearch/_async/client/ssl.py +17 -18
- elasticsearch/_async/client/synonyms.py +58 -37
- elasticsearch/_async/client/tasks.py +77 -48
- elasticsearch/_async/client/text_structure.py +65 -56
- elasticsearch/_async/client/transform.py +124 -93
- elasticsearch/_async/client/watcher.py +116 -72
- elasticsearch/_async/client/xpack.py +18 -9
- elasticsearch/_async/helpers.py +1 -2
- elasticsearch/_sync/client/__init__.py +2146 -859
- elasticsearch/_sync/client/_base.py +0 -1
- elasticsearch/_sync/client/async_search.py +44 -29
- elasticsearch/_sync/client/autoscaling.py +32 -26
- elasticsearch/_sync/client/cat.py +244 -169
- elasticsearch/_sync/client/ccr.py +268 -128
- elasticsearch/_sync/client/cluster.py +191 -164
- elasticsearch/_sync/client/connector.py +212 -116
- elasticsearch/_sync/client/dangling_indices.py +22 -16
- elasticsearch/_sync/client/enrich.py +51 -11
- elasticsearch/_sync/client/eql.py +54 -13
- elasticsearch/_sync/client/esql.py +352 -4
- elasticsearch/_sync/client/features.py +37 -27
- elasticsearch/_sync/client/fleet.py +22 -10
- elasticsearch/_sync/client/graph.py +10 -9
- elasticsearch/_sync/client/ilm.py +108 -77
- elasticsearch/_sync/client/indices.py +1112 -677
- elasticsearch/_sync/client/inference.py +1875 -133
- elasticsearch/_sync/client/ingest.py +83 -50
- elasticsearch/_sync/client/license.py +90 -38
- elasticsearch/_sync/client/logstash.py +20 -9
- elasticsearch/_sync/client/migration.py +26 -17
- elasticsearch/_sync/client/ml.py +642 -365
- elasticsearch/_sync/client/monitoring.py +6 -3
- elasticsearch/_sync/client/nodes.py +51 -53
- elasticsearch/_sync/client/query_rules.py +59 -33
- elasticsearch/_sync/client/rollup.py +124 -86
- elasticsearch/_sync/client/search_application.py +60 -32
- elasticsearch/_sync/client/searchable_snapshots.py +25 -12
- elasticsearch/_sync/client/security.py +896 -558
- elasticsearch/_sync/client/shutdown.py +34 -36
- elasticsearch/_sync/client/simulate.py +22 -28
- elasticsearch/_sync/client/slm.py +65 -40
- elasticsearch/_sync/client/snapshot.py +190 -213
- elasticsearch/_sync/client/sql.py +43 -22
- elasticsearch/_sync/client/ssl.py +17 -18
- elasticsearch/_sync/client/synonyms.py +58 -37
- elasticsearch/_sync/client/tasks.py +77 -48
- elasticsearch/_sync/client/text_structure.py +65 -56
- elasticsearch/_sync/client/transform.py +124 -93
- elasticsearch/_sync/client/utils.py +1 -4
- elasticsearch/_sync/client/watcher.py +116 -72
- elasticsearch/_sync/client/xpack.py +18 -9
- elasticsearch/_version.py +1 -1
- elasticsearch/client.py +2 -0
- elasticsearch/dsl/__init__.py +203 -0
- elasticsearch/dsl/_async/__init__.py +16 -0
- elasticsearch/dsl/_async/document.py +522 -0
- elasticsearch/dsl/_async/faceted_search.py +50 -0
- elasticsearch/dsl/_async/index.py +639 -0
- elasticsearch/dsl/_async/mapping.py +49 -0
- elasticsearch/dsl/_async/search.py +233 -0
- elasticsearch/dsl/_async/update_by_query.py +47 -0
- elasticsearch/dsl/_sync/__init__.py +16 -0
- elasticsearch/dsl/_sync/document.py +514 -0
- elasticsearch/dsl/_sync/faceted_search.py +50 -0
- elasticsearch/dsl/_sync/index.py +597 -0
- elasticsearch/dsl/_sync/mapping.py +49 -0
- elasticsearch/dsl/_sync/search.py +226 -0
- elasticsearch/dsl/_sync/update_by_query.py +45 -0
- elasticsearch/dsl/aggs.py +3730 -0
- elasticsearch/dsl/analysis.py +341 -0
- elasticsearch/dsl/async_connections.py +37 -0
- elasticsearch/dsl/connections.py +142 -0
- elasticsearch/dsl/document.py +20 -0
- elasticsearch/dsl/document_base.py +444 -0
- elasticsearch/dsl/exceptions.py +32 -0
- elasticsearch/dsl/faceted_search.py +28 -0
- elasticsearch/dsl/faceted_search_base.py +489 -0
- elasticsearch/dsl/field.py +4254 -0
- elasticsearch/dsl/function.py +180 -0
- elasticsearch/dsl/index.py +23 -0
- elasticsearch/dsl/index_base.py +178 -0
- elasticsearch/dsl/mapping.py +19 -0
- elasticsearch/dsl/mapping_base.py +219 -0
- elasticsearch/dsl/query.py +2816 -0
- elasticsearch/dsl/response/__init__.py +388 -0
- elasticsearch/dsl/response/aggs.py +100 -0
- elasticsearch/dsl/response/hit.py +53 -0
- elasticsearch/dsl/search.py +20 -0
- elasticsearch/dsl/search_base.py +1040 -0
- elasticsearch/dsl/serializer.py +34 -0
- elasticsearch/dsl/types.py +6471 -0
- elasticsearch/dsl/update_by_query.py +19 -0
- elasticsearch/dsl/update_by_query_base.py +149 -0
- elasticsearch/dsl/utils.py +687 -0
- elasticsearch/dsl/wrappers.py +119 -0
- elasticsearch/helpers/actions.py +1 -1
- elasticsearch/helpers/vectorstore/_sync/vectorstore.py +4 -1
- {elasticsearch-8.17.1.dist-info → elasticsearch-8.18.0.dist-info}/METADATA +12 -2
- elasticsearch-8.18.0.dist-info/RECORD +161 -0
- elasticsearch-8.17.1.dist-info/RECORD +0 -119
- {elasticsearch-8.17.1.dist-info → elasticsearch-8.18.0.dist-info}/WHEEL +0 -0
- {elasticsearch-8.17.1.dist-info → elasticsearch-8.18.0.dist-info}/licenses/LICENSE +0 -0
- {elasticsearch-8.17.1.dist-info → elasticsearch-8.18.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -628,6 +628,7 @@ class AsyncElasticsearch(BaseClient):
|
|
|
628
628
|
error_trace: t.Optional[bool] = None,
|
|
629
629
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
630
630
|
human: t.Optional[bool] = None,
|
|
631
|
+
include_source_on_error: t.Optional[bool] = None,
|
|
631
632
|
list_executed_pipelines: t.Optional[bool] = None,
|
|
632
633
|
pipeline: t.Optional[str] = None,
|
|
633
634
|
pretty: t.Optional[bool] = None,
|
|
@@ -646,89 +647,97 @@ class AsyncElasticsearch(BaseClient):
|
|
|
646
647
|
] = None,
|
|
647
648
|
) -> ObjectApiResponse[t.Any]:
|
|
648
649
|
"""
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
650
|
+
.. raw:: html
|
|
651
|
+
|
|
652
|
+
<p>Bulk index or delete documents.
|
|
653
|
+
Perform multiple <code>index</code>, <code>create</code>, <code>delete</code>, and <code>update</code> actions in a single request.
|
|
654
|
+
This reduces overhead and can greatly increase indexing speed.</p>
|
|
655
|
+
<p>If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or index alias:</p>
|
|
656
|
+
<ul>
|
|
657
|
+
<li>To use the <code>create</code> action, you must have the <code>create_doc</code>, <code>create</code>, <code>index</code>, or <code>write</code> index privilege. Data streams support only the <code>create</code> action.</li>
|
|
658
|
+
<li>To use the <code>index</code> action, you must have the <code>create</code>, <code>index</code>, or <code>write</code> index privilege.</li>
|
|
659
|
+
<li>To use the <code>delete</code> action, you must have the <code>delete</code> or <code>write</code> index privilege.</li>
|
|
660
|
+
<li>To use the <code>update</code> action, you must have the <code>index</code> or <code>write</code> index privilege.</li>
|
|
661
|
+
<li>To automatically create a data stream or index with a bulk API request, you must have the <code>auto_configure</code>, <code>create_index</code>, or <code>manage</code> index privilege.</li>
|
|
662
|
+
<li>To make the result of a bulk operation visible to search using the <code>refresh</code> parameter, you must have the <code>maintenance</code> or <code>manage</code> index privilege.</li>
|
|
663
|
+
</ul>
|
|
664
|
+
<p>Automatic data stream creation requires a matching index template with data stream enabled.</p>
|
|
665
|
+
<p>The actions are specified in the request body using a newline delimited JSON (NDJSON) structure:</p>
|
|
666
|
+
<pre><code>action_and_meta_data\\n
|
|
667
|
+
optional_source\\n
|
|
668
|
+
action_and_meta_data\\n
|
|
669
|
+
optional_source\\n
|
|
670
|
+
....
|
|
671
|
+
action_and_meta_data\\n
|
|
672
|
+
optional_source\\n
|
|
673
|
+
</code></pre>
|
|
674
|
+
<p>The <code>index</code> and <code>create</code> actions expect a source on the next line and have the same semantics as the <code>op_type</code> parameter in the standard index API.
|
|
675
|
+
A <code>create</code> action fails if a document with the same ID already exists in the target
|
|
676
|
+
An <code>index</code> action adds or replaces a document as necessary.</p>
|
|
677
|
+
<p>NOTE: Data streams support only the <code>create</code> action.
|
|
678
|
+
To update or delete a document in a data stream, you must target the backing index containing the document.</p>
|
|
679
|
+
<p>An <code>update</code> action expects that the partial doc, upsert, and script and its options are specified on the next line.</p>
|
|
680
|
+
<p>A <code>delete</code> action does not expect a source on the next line and has the same semantics as the standard delete API.</p>
|
|
681
|
+
<p>NOTE: The final line of data must end with a newline character (<code>\\n</code>).
|
|
682
|
+
Each newline character may be preceded by a carriage return (<code>\\r</code>).
|
|
683
|
+
When sending NDJSON data to the <code>_bulk</code> endpoint, use a <code>Content-Type</code> header of <code>application/json</code> or <code>application/x-ndjson</code>.
|
|
684
|
+
Because this format uses literal newline characters (<code>\\n</code>) as delimiters, make sure that the JSON actions and sources are not pretty printed.</p>
|
|
685
|
+
<p>If you provide a target in the request path, it is used for any actions that don't explicitly specify an <code>_index</code> argument.</p>
|
|
686
|
+
<p>A note on the format: the idea here is to make processing as fast as possible.
|
|
687
|
+
As some of the actions are redirected to other shards on other nodes, only <code>action_meta_data</code> is parsed on the receiving node side.</p>
|
|
688
|
+
<p>Client libraries using this protocol should try and strive to do something similar on the client side, and reduce buffering as much as possible.</p>
|
|
689
|
+
<p>There is no "correct" number of actions to perform in a single bulk request.
|
|
690
|
+
Experiment with different settings to find the optimal size for your particular workload.
|
|
691
|
+
Note that Elasticsearch limits the maximum size of a HTTP request to 100mb by default so clients must ensure that no request exceeds this size.
|
|
692
|
+
It is not possible to index a single document that exceeds the size limit, so you must pre-process any such documents into smaller pieces before sending them to Elasticsearch.
|
|
693
|
+
For instance, split documents into pages or chapters before indexing them, or store raw binary data in a system outside Elasticsearch and replace the raw data with a link to the external system in the documents that you send to Elasticsearch.</p>
|
|
694
|
+
<p><strong>Client suppport for bulk requests</strong></p>
|
|
695
|
+
<p>Some of the officially supported clients provide helpers to assist with bulk requests and reindexing:</p>
|
|
696
|
+
<ul>
|
|
697
|
+
<li>Go: Check out <code>esutil.BulkIndexer</code></li>
|
|
698
|
+
<li>Perl: Check out <code>Search::Elasticsearch::Client::5_0::Bulk</code> and <code>Search::Elasticsearch::Client::5_0::Scroll</code></li>
|
|
699
|
+
<li>Python: Check out <code>elasticsearch.helpers.*</code></li>
|
|
700
|
+
<li>JavaScript: Check out <code>client.helpers.*</code></li>
|
|
701
|
+
<li>.NET: Check out <code>BulkAllObservable</code></li>
|
|
702
|
+
<li>PHP: Check out bulk indexing.</li>
|
|
703
|
+
</ul>
|
|
704
|
+
<p><strong>Submitting bulk requests with cURL</strong></p>
|
|
705
|
+
<p>If you're providing text file input to <code>curl</code>, you must use the <code>--data-binary</code> flag instead of plain <code>-d</code>.
|
|
706
|
+
The latter doesn't preserve newlines. For example:</p>
|
|
707
|
+
<pre><code>$ cat requests
|
|
708
|
+
{ "index" : { "_index" : "test", "_id" : "1" } }
|
|
709
|
+
{ "field1" : "value1" }
|
|
710
|
+
$ curl -s -H "Content-Type: application/x-ndjson" -XPOST localhost:9200/_bulk --data-binary "@requests"; echo
|
|
711
|
+
{"took":7, "errors": false, "items":[{"index":{"_index":"test","_id":"1","_version":1,"result":"created","forced_refresh":false}}]}
|
|
712
|
+
</code></pre>
|
|
713
|
+
<p><strong>Optimistic concurrency control</strong></p>
|
|
714
|
+
<p>Each <code>index</code> and <code>delete</code> action within a bulk API call may include the <code>if_seq_no</code> and <code>if_primary_term</code> parameters in their respective action and meta data lines.
|
|
715
|
+
The <code>if_seq_no</code> and <code>if_primary_term</code> parameters control how operations are run, based on the last modification to existing documents. See Optimistic concurrency control for more details.</p>
|
|
716
|
+
<p><strong>Versioning</strong></p>
|
|
717
|
+
<p>Each bulk item can include the version value using the <code>version</code> field.
|
|
718
|
+
It automatically follows the behavior of the index or delete operation based on the <code>_version</code> mapping.
|
|
719
|
+
It also support the <code>version_type</code>.</p>
|
|
720
|
+
<p><strong>Routing</strong></p>
|
|
721
|
+
<p>Each bulk item can include the routing value using the <code>routing</code> field.
|
|
722
|
+
It automatically follows the behavior of the index or delete operation based on the <code>_routing</code> mapping.</p>
|
|
723
|
+
<p>NOTE: Data streams do not support custom routing unless they were created with the <code>allow_custom_routing</code> setting enabled in the template.</p>
|
|
724
|
+
<p><strong>Wait for active shards</strong></p>
|
|
725
|
+
<p>When making bulk calls, you can set the <code>wait_for_active_shards</code> parameter to require a minimum number of shard copies to be active before starting to process the bulk request.</p>
|
|
726
|
+
<p><strong>Refresh</strong></p>
|
|
727
|
+
<p>Control when the changes made by this request are visible to search.</p>
|
|
728
|
+
<p>NOTE: Only the shards that receive the bulk request will be affected by refresh.
|
|
729
|
+
Imagine a <code>_bulk?refresh=wait_for</code> request with three documents in it that happen to be routed to different shards in an index with five shards.
|
|
730
|
+
The request will only wait for those three shards to refresh.
|
|
731
|
+
The other two shards that make up the index do not participate in the <code>_bulk</code> request at all.</p>
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-bulk.html>`_
|
|
728
735
|
|
|
729
736
|
:param operations:
|
|
730
737
|
:param index: The name of the data stream, index, or index alias to perform bulk
|
|
731
738
|
actions on.
|
|
739
|
+
:param include_source_on_error: True or false if to include the document source
|
|
740
|
+
in the error message in case of parsing errors.
|
|
732
741
|
:param list_executed_pipelines: If `true`, the response will include the ingest
|
|
733
742
|
pipelines that were run for each index or create.
|
|
734
743
|
:param pipeline: The pipeline identifier to use to preprocess incoming documents.
|
|
@@ -786,6 +795,8 @@ class AsyncElasticsearch(BaseClient):
|
|
|
786
795
|
__query["filter_path"] = filter_path
|
|
787
796
|
if human is not None:
|
|
788
797
|
__query["human"] = human
|
|
798
|
+
if include_source_on_error is not None:
|
|
799
|
+
__query["include_source_on_error"] = include_source_on_error
|
|
789
800
|
if list_executed_pipelines is not None:
|
|
790
801
|
__query["list_executed_pipelines"] = list_executed_pipelines
|
|
791
802
|
if pipeline is not None:
|
|
@@ -839,10 +850,13 @@ class AsyncElasticsearch(BaseClient):
|
|
|
839
850
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
840
851
|
) -> ObjectApiResponse[t.Any]:
|
|
841
852
|
"""
|
|
842
|
-
|
|
843
|
-
search.
|
|
853
|
+
.. raw:: html
|
|
844
854
|
|
|
845
|
-
|
|
855
|
+
<p>Clear a scrolling search.
|
|
856
|
+
Clear the search context and results for a scrolling search.</p>
|
|
857
|
+
|
|
858
|
+
|
|
859
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/clear-scroll-api.html>`_
|
|
846
860
|
|
|
847
861
|
:param scroll_id: The scroll IDs to clear. To clear all scroll IDs, use `_all`.
|
|
848
862
|
"""
|
|
@@ -890,13 +904,16 @@ class AsyncElasticsearch(BaseClient):
|
|
|
890
904
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
891
905
|
) -> ObjectApiResponse[t.Any]:
|
|
892
906
|
"""
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
907
|
+
.. raw:: html
|
|
908
|
+
|
|
909
|
+
<p>Close a point in time.
|
|
910
|
+
A point in time must be opened explicitly before being used in search requests.
|
|
911
|
+
The <code>keep_alive</code> parameter tells Elasticsearch how long it should persist.
|
|
912
|
+
A point in time is automatically closed when the <code>keep_alive</code> period has elapsed.
|
|
913
|
+
However, keeping points in time has a cost; close them as soon as they are no longer required for search requests.</p>
|
|
914
|
+
|
|
898
915
|
|
|
899
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
916
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/point-in-time-api.html>`_
|
|
900
917
|
|
|
901
918
|
:param id: The ID of the point-in-time.
|
|
902
919
|
"""
|
|
@@ -968,16 +985,19 @@ class AsyncElasticsearch(BaseClient):
|
|
|
968
985
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
969
986
|
) -> ObjectApiResponse[t.Any]:
|
|
970
987
|
"""
|
|
971
|
-
|
|
972
|
-
can either be provided using a simple query string as a parameter or using the
|
|
973
|
-
Query DSL defined within the request body. The latter must be nested in a `query`
|
|
974
|
-
key, which is the same as the search API. The count API supports multi-target
|
|
975
|
-
syntax. You can run a single count API search across multiple data streams and
|
|
976
|
-
indices. The operation is broadcast across all shards. For each shard ID group,
|
|
977
|
-
a replica is chosen and the search is run against it. This means that replicas
|
|
978
|
-
increase the scalability of the count.
|
|
988
|
+
.. raw:: html
|
|
979
989
|
|
|
980
|
-
|
|
990
|
+
<p>Count search results.
|
|
991
|
+
Get the number of documents matching a query.</p>
|
|
992
|
+
<p>The query can be provided either by using a simple query string as a parameter, or by defining Query DSL within the request body.
|
|
993
|
+
The query is optional. When no query is provided, the API uses <code>match_all</code> to count all the documents.</p>
|
|
994
|
+
<p>The count API supports multi-target syntax. You can run a single count API search across multiple data streams and indices.</p>
|
|
995
|
+
<p>The operation is broadcast across all shards.
|
|
996
|
+
For each shard ID group, a replica is chosen and the search is run against it.
|
|
997
|
+
This means that replicas increase the scalability of the count.</p>
|
|
998
|
+
|
|
999
|
+
|
|
1000
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/search-count.html>`_
|
|
981
1001
|
|
|
982
1002
|
:param index: A comma-separated list of data streams, indices, and aliases to
|
|
983
1003
|
search. It supports wildcards (`*`). To search all data streams and indices,
|
|
@@ -1012,10 +1032,10 @@ class AsyncElasticsearch(BaseClient):
|
|
|
1012
1032
|
in the result.
|
|
1013
1033
|
:param preference: The node or shard the operation should be performed on. By
|
|
1014
1034
|
default, it is random.
|
|
1015
|
-
:param q: The query in Lucene query string syntax.
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1035
|
+
:param q: The query in Lucene query string syntax. This parameter cannot be used
|
|
1036
|
+
with a request body.
|
|
1037
|
+
:param query: Defines the search query using Query DSL. A request body query
|
|
1038
|
+
cannot be used with the `q` query string parameter.
|
|
1019
1039
|
:param routing: A custom value used to route operations to a specific shard.
|
|
1020
1040
|
:param terminate_after: The maximum number of documents to collect for each shard.
|
|
1021
1041
|
If a query reaches this limit, Elasticsearch terminates the query early.
|
|
@@ -1101,11 +1121,17 @@ class AsyncElasticsearch(BaseClient):
|
|
|
1101
1121
|
error_trace: t.Optional[bool] = None,
|
|
1102
1122
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1103
1123
|
human: t.Optional[bool] = None,
|
|
1124
|
+
if_primary_term: t.Optional[int] = None,
|
|
1125
|
+
if_seq_no: t.Optional[int] = None,
|
|
1126
|
+
include_source_on_error: t.Optional[bool] = None,
|
|
1127
|
+
op_type: t.Optional[t.Union[str, t.Literal["create", "index"]]] = None,
|
|
1104
1128
|
pipeline: t.Optional[str] = None,
|
|
1105
1129
|
pretty: t.Optional[bool] = None,
|
|
1106
1130
|
refresh: t.Optional[
|
|
1107
1131
|
t.Union[bool, str, t.Literal["false", "true", "wait_for"]]
|
|
1108
1132
|
] = None,
|
|
1133
|
+
require_alias: t.Optional[bool] = None,
|
|
1134
|
+
require_data_stream: t.Optional[bool] = None,
|
|
1109
1135
|
routing: t.Optional[str] = None,
|
|
1110
1136
|
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1111
1137
|
version: t.Optional[int] = None,
|
|
@@ -1117,38 +1143,115 @@ class AsyncElasticsearch(BaseClient):
|
|
|
1117
1143
|
] = None,
|
|
1118
1144
|
) -> ObjectApiResponse[t.Any]:
|
|
1119
1145
|
"""
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1146
|
+
.. raw:: html
|
|
1147
|
+
|
|
1148
|
+
<p>Create a new document in the index.</p>
|
|
1149
|
+
<p>You can index a new JSON document with the <code>/<target>/_doc/</code> or <code>/<target>/_create/<_id></code> APIs
|
|
1150
|
+
Using <code>_create</code> guarantees that the document is indexed only if it does not already exist.
|
|
1151
|
+
It returns a 409 response when a document with a same ID already exists in the index.
|
|
1152
|
+
To update an existing document, you must use the <code>/<target>/_doc/</code> API.</p>
|
|
1153
|
+
<p>If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or index alias:</p>
|
|
1154
|
+
<ul>
|
|
1155
|
+
<li>To add a document using the <code>PUT /<target>/_create/<_id></code> or <code>POST /<target>/_create/<_id></code> request formats, you must have the <code>create_doc</code>, <code>create</code>, <code>index</code>, or <code>write</code> index privilege.</li>
|
|
1156
|
+
<li>To automatically create a data stream or index with this API request, you must have the <code>auto_configure</code>, <code>create_index</code>, or <code>manage</code> index privilege.</li>
|
|
1157
|
+
</ul>
|
|
1158
|
+
<p>Automatic data stream creation requires a matching index template with data stream enabled.</p>
|
|
1159
|
+
<p><strong>Automatically create data streams and indices</strong></p>
|
|
1160
|
+
<p>If the request's target doesn't exist and matches an index template with a <code>data_stream</code> definition, the index operation automatically creates the data stream.</p>
|
|
1161
|
+
<p>If the target doesn't exist and doesn't match a data stream template, the operation automatically creates the index and applies any matching index templates.</p>
|
|
1162
|
+
<p>NOTE: Elasticsearch includes several built-in index templates. To avoid naming collisions with these templates, refer to index pattern documentation.</p>
|
|
1163
|
+
<p>If no mapping exists, the index operation creates a dynamic mapping.
|
|
1164
|
+
By default, new fields and objects are automatically added to the mapping if needed.</p>
|
|
1165
|
+
<p>Automatic index creation is controlled by the <code>action.auto_create_index</code> setting.
|
|
1166
|
+
If it is <code>true</code>, any index can be created automatically.
|
|
1167
|
+
You can modify this setting to explicitly allow or block automatic creation of indices that match specified patterns or set it to <code>false</code> to turn off automatic index creation entirely.
|
|
1168
|
+
Specify a comma-separated list of patterns you want to allow or prefix each pattern with <code>+</code> or <code>-</code> to indicate whether it should be allowed or blocked.
|
|
1169
|
+
When a list is specified, the default behaviour is to disallow.</p>
|
|
1170
|
+
<p>NOTE: The <code>action.auto_create_index</code> setting affects the automatic creation of indices only.
|
|
1171
|
+
It does not affect the creation of data streams.</p>
|
|
1172
|
+
<p><strong>Routing</strong></p>
|
|
1173
|
+
<p>By default, shard placement — or routing — is controlled by using a hash of the document's ID value.
|
|
1174
|
+
For more explicit control, the value fed into the hash function used by the router can be directly specified on a per-operation basis using the <code>routing</code> parameter.</p>
|
|
1175
|
+
<p>When setting up explicit mapping, you can also use the <code>_routing</code> field to direct the index operation to extract the routing value from the document itself.
|
|
1176
|
+
This does come at the (very minimal) cost of an additional document parsing pass.
|
|
1177
|
+
If the <code>_routing</code> mapping is defined and set to be required, the index operation will fail if no routing value is provided or extracted.</p>
|
|
1178
|
+
<p>NOTE: Data streams do not support custom routing unless they were created with the <code>allow_custom_routing</code> setting enabled in the template.</p>
|
|
1179
|
+
<p><strong>Distributed</strong></p>
|
|
1180
|
+
<p>The index operation is directed to the primary shard based on its route and performed on the actual node containing this shard.
|
|
1181
|
+
After the primary shard completes the operation, if needed, the update is distributed to applicable replicas.</p>
|
|
1182
|
+
<p><strong>Active shards</strong></p>
|
|
1183
|
+
<p>To improve the resiliency of writes to the system, indexing operations can be configured to wait for a certain number of active shard copies before proceeding with the operation.
|
|
1184
|
+
If the requisite number of active shard copies are not available, then the write operation must wait and retry, until either the requisite shard copies have started or a timeout occurs.
|
|
1185
|
+
By default, write operations only wait for the primary shards to be active before proceeding (that is to say <code>wait_for_active_shards</code> is <code>1</code>).
|
|
1186
|
+
This default can be overridden in the index settings dynamically by setting <code>index.write.wait_for_active_shards</code>.
|
|
1187
|
+
To alter this behavior per operation, use the <code>wait_for_active_shards request</code> parameter.</p>
|
|
1188
|
+
<p>Valid values are all or any positive integer up to the total number of configured copies per shard in the index (which is <code>number_of_replicas</code>+1).
|
|
1189
|
+
Specifying a negative value or a number greater than the number of shard copies will throw an error.</p>
|
|
1190
|
+
<p>For example, suppose you have a cluster of three nodes, A, B, and C and you create an index index with the number of replicas set to 3 (resulting in 4 shard copies, one more copy than there are nodes).
|
|
1191
|
+
If you attempt an indexing operation, by default the operation will only ensure the primary copy of each shard is available before proceeding.
|
|
1192
|
+
This means that even if B and C went down and A hosted the primary shard copies, the indexing operation would still proceed with only one copy of the data.
|
|
1193
|
+
If <code>wait_for_active_shards</code> is set on the request to <code>3</code> (and all three nodes are up), the indexing operation will require 3 active shard copies before proceeding.
|
|
1194
|
+
This requirement should be met because there are 3 active nodes in the cluster, each one holding a copy of the shard.
|
|
1195
|
+
However, if you set <code>wait_for_active_shards</code> to <code>all</code> (or to <code>4</code>, which is the same in this situation), the indexing operation will not proceed as you do not have all 4 copies of each shard active in the index.
|
|
1196
|
+
The operation will timeout unless a new node is brought up in the cluster to host the fourth copy of the shard.</p>
|
|
1197
|
+
<p>It is important to note that this setting greatly reduces the chances of the write operation not writing to the requisite number of shard copies, but it does not completely eliminate the possibility, because this check occurs before the write operation starts.
|
|
1198
|
+
After the write operation is underway, it is still possible for replication to fail on any number of shard copies but still succeed on the primary.
|
|
1199
|
+
The <code>_shards</code> section of the API response reveals the number of shard copies on which replication succeeded and failed.</p>
|
|
1200
|
+
|
|
1201
|
+
|
|
1202
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-index_.html>`_
|
|
1203
|
+
|
|
1204
|
+
:param index: The name of the data stream or index to target. If the target doesn't
|
|
1127
1205
|
exist and matches the name or wildcard (`*`) pattern of an index template
|
|
1128
1206
|
with a `data_stream` definition, this request creates the data stream. If
|
|
1129
|
-
the target doesn
|
|
1207
|
+
the target doesn't exist and doesn’t match a data stream template, this request
|
|
1130
1208
|
creates the index.
|
|
1131
|
-
:param id:
|
|
1209
|
+
:param id: A unique identifier for the document. To automatically generate a
|
|
1210
|
+
document ID, use the `POST /<target>/_doc/` request format.
|
|
1132
1211
|
:param document:
|
|
1133
|
-
:param
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1212
|
+
:param if_primary_term: Only perform the operation if the document has this primary
|
|
1213
|
+
term.
|
|
1214
|
+
:param if_seq_no: Only perform the operation if the document has this sequence
|
|
1215
|
+
number.
|
|
1216
|
+
:param include_source_on_error: True or false if to include the document source
|
|
1217
|
+
in the error message in case of parsing errors.
|
|
1218
|
+
:param op_type: Set to `create` to only index the document if it does not already
|
|
1219
|
+
exist (put if absent). If a document with the specified `_id` already exists,
|
|
1220
|
+
the indexing operation will fail. The behavior is the same as using the `<index>/_create`
|
|
1221
|
+
endpoint. If a document ID is specified, this paramater defaults to `index`.
|
|
1222
|
+
Otherwise, it defaults to `create`. If the request targets a data stream,
|
|
1223
|
+
an `op_type` of `create` is required.
|
|
1224
|
+
:param pipeline: The ID of the pipeline to use to preprocess incoming documents.
|
|
1225
|
+
If the index has a default ingest pipeline specified, setting the value to
|
|
1226
|
+
`_none` turns off the default ingest pipeline for this request. If a final
|
|
1227
|
+
pipeline is configured, it will always run regardless of the value of this
|
|
1137
1228
|
parameter.
|
|
1138
1229
|
:param refresh: If `true`, Elasticsearch refreshes the affected shards to make
|
|
1139
|
-
this operation visible to search
|
|
1140
|
-
make this operation visible to search
|
|
1141
|
-
|
|
1142
|
-
:param
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1230
|
+
this operation visible to search. If `wait_for`, it waits for a refresh to
|
|
1231
|
+
make this operation visible to search. If `false`, it does nothing with refreshes.
|
|
1232
|
+
:param require_alias: If `true`, the destination must be an index alias.
|
|
1233
|
+
:param require_data_stream: If `true`, the request's actions must target a data
|
|
1234
|
+
stream (existing or to be created).
|
|
1235
|
+
:param routing: A custom value that is used to route operations to a specific
|
|
1236
|
+
shard.
|
|
1237
|
+
:param timeout: The period the request waits for the following operations: automatic
|
|
1238
|
+
index creation, dynamic mapping updates, waiting for active shards. Elasticsearch
|
|
1239
|
+
waits for at least the specified timeout period before failing. The actual
|
|
1240
|
+
wait time could be longer, particularly when multiple waits occur. This parameter
|
|
1241
|
+
is useful for situations where the primary shard assigned to perform the
|
|
1242
|
+
operation might not be available when the operation runs. Some reasons for
|
|
1243
|
+
this might be that the primary shard is currently recovering from a gateway
|
|
1244
|
+
or undergoing relocation. By default, the operation will wait on the primary
|
|
1245
|
+
shard to become available for at least 1 minute before failing and responding
|
|
1246
|
+
with an error. The actual wait time could be longer, particularly when multiple
|
|
1247
|
+
waits occur.
|
|
1248
|
+
:param version: The explicit version number for concurrency control. It must
|
|
1249
|
+
be a non-negative long number.
|
|
1250
|
+
:param version_type: The version type.
|
|
1149
1251
|
:param wait_for_active_shards: The number of shard copies that must be active
|
|
1150
|
-
before proceeding with the operation.
|
|
1151
|
-
up to the total number of shards in the index (`number_of_replicas+1`).
|
|
1252
|
+
before proceeding with the operation. You can set it to `all` or any positive
|
|
1253
|
+
integer up to the total number of shards in the index (`number_of_replicas+1`).
|
|
1254
|
+
The default value of `1` means it waits for each primary shard to be active.
|
|
1152
1255
|
"""
|
|
1153
1256
|
if index in SKIP_IN_PATH:
|
|
1154
1257
|
raise ValueError("Empty value passed for parameter 'index'")
|
|
@@ -1169,12 +1272,24 @@ class AsyncElasticsearch(BaseClient):
|
|
|
1169
1272
|
__query["filter_path"] = filter_path
|
|
1170
1273
|
if human is not None:
|
|
1171
1274
|
__query["human"] = human
|
|
1275
|
+
if if_primary_term is not None:
|
|
1276
|
+
__query["if_primary_term"] = if_primary_term
|
|
1277
|
+
if if_seq_no is not None:
|
|
1278
|
+
__query["if_seq_no"] = if_seq_no
|
|
1279
|
+
if include_source_on_error is not None:
|
|
1280
|
+
__query["include_source_on_error"] = include_source_on_error
|
|
1281
|
+
if op_type is not None:
|
|
1282
|
+
__query["op_type"] = op_type
|
|
1172
1283
|
if pipeline is not None:
|
|
1173
1284
|
__query["pipeline"] = pipeline
|
|
1174
1285
|
if pretty is not None:
|
|
1175
1286
|
__query["pretty"] = pretty
|
|
1176
1287
|
if refresh is not None:
|
|
1177
1288
|
__query["refresh"] = refresh
|
|
1289
|
+
if require_alias is not None:
|
|
1290
|
+
__query["require_alias"] = require_alias
|
|
1291
|
+
if require_data_stream is not None:
|
|
1292
|
+
__query["require_data_stream"] = require_data_stream
|
|
1178
1293
|
if routing is not None:
|
|
1179
1294
|
__query["routing"] = routing
|
|
1180
1295
|
if timeout is not None:
|
|
@@ -1223,29 +1338,60 @@ class AsyncElasticsearch(BaseClient):
|
|
|
1223
1338
|
] = None,
|
|
1224
1339
|
) -> ObjectApiResponse[t.Any]:
|
|
1225
1340
|
"""
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1341
|
+
.. raw:: html
|
|
1342
|
+
|
|
1343
|
+
<p>Delete a document.</p>
|
|
1344
|
+
<p>Remove a JSON document from the specified index.</p>
|
|
1345
|
+
<p>NOTE: You cannot send deletion requests directly to a data stream.
|
|
1346
|
+
To delete a document in a data stream, you must target the backing index containing the document.</p>
|
|
1347
|
+
<p><strong>Optimistic concurrency control</strong></p>
|
|
1348
|
+
<p>Delete operations can be made conditional and only be performed if the last modification to the document was assigned the sequence number and primary term specified by the <code>if_seq_no</code> and <code>if_primary_term</code> parameters.
|
|
1349
|
+
If a mismatch is detected, the operation will result in a <code>VersionConflictException</code> and a status code of <code>409</code>.</p>
|
|
1350
|
+
<p><strong>Versioning</strong></p>
|
|
1351
|
+
<p>Each document indexed is versioned.
|
|
1352
|
+
When deleting a document, the version can be specified to make sure the relevant document you are trying to delete is actually being deleted and it has not changed in the meantime.
|
|
1353
|
+
Every write operation run on a document, deletes included, causes its version to be incremented.
|
|
1354
|
+
The version number of a deleted document remains available for a short time after deletion to allow for control of concurrent operations.
|
|
1355
|
+
The length of time for which a deleted document's version remains available is determined by the <code>index.gc_deletes</code> index setting.</p>
|
|
1356
|
+
<p><strong>Routing</strong></p>
|
|
1357
|
+
<p>If routing is used during indexing, the routing value also needs to be specified to delete a document.</p>
|
|
1358
|
+
<p>If the <code>_routing</code> mapping is set to <code>required</code> and no routing value is specified, the delete API throws a <code>RoutingMissingException</code> and rejects the request.</p>
|
|
1359
|
+
<p>For example:</p>
|
|
1360
|
+
<pre><code>DELETE /my-index-000001/_doc/1?routing=shard-1
|
|
1361
|
+
</code></pre>
|
|
1362
|
+
<p>This request deletes the document with ID 1, but it is routed based on the user.
|
|
1363
|
+
The document is not deleted if the correct routing is not specified.</p>
|
|
1364
|
+
<p><strong>Distributed</strong></p>
|
|
1365
|
+
<p>The delete operation gets hashed into a specific shard ID.
|
|
1366
|
+
It then gets redirected into the primary shard within that ID group and replicated (if needed) to shard replicas within that ID group.</p>
|
|
1367
|
+
|
|
1368
|
+
|
|
1369
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-delete.html>`_
|
|
1370
|
+
|
|
1371
|
+
:param index: The name of the target index.
|
|
1372
|
+
:param id: A unique identifier for the document.
|
|
1232
1373
|
:param if_primary_term: Only perform the operation if the document has this primary
|
|
1233
1374
|
term.
|
|
1234
1375
|
:param if_seq_no: Only perform the operation if the document has this sequence
|
|
1235
1376
|
number.
|
|
1236
1377
|
:param refresh: If `true`, Elasticsearch refreshes the affected shards to make
|
|
1237
|
-
this operation visible to search
|
|
1238
|
-
make this operation visible to search
|
|
1239
|
-
|
|
1240
|
-
:param
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1378
|
+
this operation visible to search. If `wait_for`, it waits for a refresh to
|
|
1379
|
+
make this operation visible to search. If `false`, it does nothing with refreshes.
|
|
1380
|
+
:param routing: A custom value used to route operations to a specific shard.
|
|
1381
|
+
:param timeout: The period to wait for active shards. This parameter is useful
|
|
1382
|
+
for situations where the primary shard assigned to perform the delete operation
|
|
1383
|
+
might not be available when the delete operation runs. Some reasons for this
|
|
1384
|
+
might be that the primary shard is currently recovering from a store or undergoing
|
|
1385
|
+
relocation. By default, the delete operation will wait on the primary shard
|
|
1386
|
+
to become available for up to 1 minute before failing and responding with
|
|
1387
|
+
an error.
|
|
1388
|
+
:param version: An explicit version number for concurrency control. It must match
|
|
1389
|
+
the current version of the document for the request to succeed.
|
|
1390
|
+
:param version_type: The version type.
|
|
1391
|
+
:param wait_for_active_shards: The minimum number of shard copies that must be
|
|
1392
|
+
active before proceeding with the operation. You can set it to `all` or any
|
|
1393
|
+
positive integer up to the total number of shards in the index (`number_of_replicas+1`).
|
|
1394
|
+
The default value of `1` means it waits for each primary shard to be active.
|
|
1249
1395
|
"""
|
|
1250
1396
|
if index in SKIP_IN_PATH:
|
|
1251
1397
|
raise ValueError("Empty value passed for parameter 'index'")
|
|
@@ -1345,72 +1491,148 @@ class AsyncElasticsearch(BaseClient):
|
|
|
1345
1491
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1346
1492
|
) -> ObjectApiResponse[t.Any]:
|
|
1347
1493
|
"""
|
|
1348
|
-
|
|
1494
|
+
.. raw:: html
|
|
1495
|
+
|
|
1496
|
+
<p>Delete documents.</p>
|
|
1497
|
+
<p>Deletes documents that match the specified query.</p>
|
|
1498
|
+
<p>If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or alias:</p>
|
|
1499
|
+
<ul>
|
|
1500
|
+
<li><code>read</code></li>
|
|
1501
|
+
<li><code>delete</code> or <code>write</code></li>
|
|
1502
|
+
</ul>
|
|
1503
|
+
<p>You can specify the query criteria in the request URI or the request body using the same syntax as the search API.
|
|
1504
|
+
When you submit a delete by query request, Elasticsearch gets a snapshot of the data stream or index when it begins processing the request and deletes matching documents using internal versioning.
|
|
1505
|
+
If a document changes between the time that the snapshot is taken and the delete operation is processed, it results in a version conflict and the delete operation fails.</p>
|
|
1506
|
+
<p>NOTE: Documents with a version equal to 0 cannot be deleted using delete by query because internal versioning does not support 0 as a valid version number.</p>
|
|
1507
|
+
<p>While processing a delete by query request, Elasticsearch performs multiple search requests sequentially to find all of the matching documents to delete.
|
|
1508
|
+
A bulk delete request is performed for each batch of matching documents.
|
|
1509
|
+
If a search or bulk request is rejected, the requests are retried up to 10 times, with exponential back off.
|
|
1510
|
+
If the maximum retry limit is reached, processing halts and all failed requests are returned in the response.
|
|
1511
|
+
Any delete requests that completed successfully still stick, they are not rolled back.</p>
|
|
1512
|
+
<p>You can opt to count version conflicts instead of halting and returning by setting <code>conflicts</code> to <code>proceed</code>.
|
|
1513
|
+
Note that if you opt to count version conflicts the operation could attempt to delete more documents from the source than <code>max_docs</code> until it has successfully deleted <code>max_docs documents</code>, or it has gone through every document in the source query.</p>
|
|
1514
|
+
<p><strong>Throttling delete requests</strong></p>
|
|
1515
|
+
<p>To control the rate at which delete by query issues batches of delete operations, you can set <code>requests_per_second</code> to any positive decimal number.
|
|
1516
|
+
This pads each batch with a wait time to throttle the rate.
|
|
1517
|
+
Set <code>requests_per_second</code> to <code>-1</code> to disable throttling.</p>
|
|
1518
|
+
<p>Throttling uses a wait time between batches so that the internal scroll requests can be given a timeout that takes the request padding into account.
|
|
1519
|
+
The padding time is the difference between the batch size divided by the <code>requests_per_second</code> and the time spent writing.
|
|
1520
|
+
By default the batch size is <code>1000</code>, so if <code>requests_per_second</code> is set to <code>500</code>:</p>
|
|
1521
|
+
<pre><code>target_time = 1000 / 500 per second = 2 seconds
|
|
1522
|
+
wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds
|
|
1523
|
+
</code></pre>
|
|
1524
|
+
<p>Since the batch is issued as a single <code>_bulk</code> request, large batch sizes cause Elasticsearch to create many requests and wait before starting the next set.
|
|
1525
|
+
This is "bursty" instead of "smooth".</p>
|
|
1526
|
+
<p><strong>Slicing</strong></p>
|
|
1527
|
+
<p>Delete by query supports sliced scroll to parallelize the delete process.
|
|
1528
|
+
This can improve efficiency and provide a convenient way to break the request down into smaller parts.</p>
|
|
1529
|
+
<p>Setting <code>slices</code> to <code>auto</code> lets Elasticsearch choose the number of slices to use.
|
|
1530
|
+
This setting will use one slice per shard, up to a certain limit.
|
|
1531
|
+
If there are multiple source data streams or indices, it will choose the number of slices based on the index or backing index with the smallest number of shards.
|
|
1532
|
+
Adding slices to the delete by query operation creates sub-requests which means it has some quirks:</p>
|
|
1533
|
+
<ul>
|
|
1534
|
+
<li>You can see these requests in the tasks APIs. These sub-requests are "child" tasks of the task for the request with slices.</li>
|
|
1535
|
+
<li>Fetching the status of the task for the request with slices only contains the status of completed slices.</li>
|
|
1536
|
+
<li>These sub-requests are individually addressable for things like cancellation and rethrottling.</li>
|
|
1537
|
+
<li>Rethrottling the request with <code>slices</code> will rethrottle the unfinished sub-request proportionally.</li>
|
|
1538
|
+
<li>Canceling the request with <code>slices</code> will cancel each sub-request.</li>
|
|
1539
|
+
<li>Due to the nature of <code>slices</code> each sub-request won't get a perfectly even portion of the documents. All documents will be addressed, but some slices may be larger than others. Expect larger slices to have a more even distribution.</li>
|
|
1540
|
+
<li>Parameters like <code>requests_per_second</code> and <code>max_docs</code> on a request with <code>slices</code> are distributed proportionally to each sub-request. Combine that with the earlier point about distribution being uneven and you should conclude that using <code>max_docs</code> with <code>slices</code> might not result in exactly <code>max_docs</code> documents being deleted.</li>
|
|
1541
|
+
<li>Each sub-request gets a slightly different snapshot of the source data stream or index though these are all taken at approximately the same time.</li>
|
|
1542
|
+
</ul>
|
|
1543
|
+
<p>If you're slicing manually or otherwise tuning automatic slicing, keep in mind that:</p>
|
|
1544
|
+
<ul>
|
|
1545
|
+
<li>Query performance is most efficient when the number of slices is equal to the number of shards in the index or backing index. If that number is large (for example, 500), choose a lower number as too many <code>slices</code> hurts performance. Setting <code>slices</code> higher than the number of shards generally does not improve efficiency and adds overhead.</li>
|
|
1546
|
+
<li>Delete performance scales linearly across available resources with the number of slices.</li>
|
|
1547
|
+
</ul>
|
|
1548
|
+
<p>Whether query or delete performance dominates the runtime depends on the documents being reindexed and cluster resources.</p>
|
|
1549
|
+
<p><strong>Cancel a delete by query operation</strong></p>
|
|
1550
|
+
<p>Any delete by query can be canceled using the task cancel API. For example:</p>
|
|
1551
|
+
<pre><code>POST _tasks/r1A2WoRbTwKZ516z6NEs5A:36619/_cancel
|
|
1552
|
+
</code></pre>
|
|
1553
|
+
<p>The task ID can be found by using the get tasks API.</p>
|
|
1554
|
+
<p>Cancellation should happen quickly but might take a few seconds.
|
|
1555
|
+
The get task status API will continue to list the delete by query task until this task checks that it has been cancelled and terminates itself.</p>
|
|
1556
|
+
|
|
1557
|
+
|
|
1558
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-delete-by-query.html>`_
|
|
1349
1559
|
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
Supports wildcards (`*`). To search all data streams or indices, omit this
|
|
1354
|
-
parameter or use `*` or `_all`.
|
|
1560
|
+
:param index: A comma-separated list of data streams, indices, and aliases to
|
|
1561
|
+
search. It supports wildcards (`*`). To search all data streams or indices,
|
|
1562
|
+
omit this parameter or use `*` or `_all`.
|
|
1355
1563
|
:param allow_no_indices: If `false`, the request returns an error if any wildcard
|
|
1356
1564
|
expression, index alias, or `_all` value targets only missing or closed indices.
|
|
1357
1565
|
This behavior applies even if the request targets other open indices. For
|
|
1358
1566
|
example, a request targeting `foo*,bar*` returns an error if an index starts
|
|
1359
1567
|
with `foo` but no index starts with `bar`.
|
|
1360
1568
|
:param analyze_wildcard: If `true`, wildcard and prefix queries are analyzed.
|
|
1361
|
-
|
|
1569
|
+
This parameter can be used only when the `q` query string parameter is specified.
|
|
1570
|
+
:param analyzer: Analyzer to use for the query string. This parameter can be
|
|
1571
|
+
used only when the `q` query string parameter is specified.
|
|
1362
1572
|
:param conflicts: What to do if delete by query hits version conflicts: `abort`
|
|
1363
1573
|
or `proceed`.
|
|
1364
1574
|
:param default_operator: The default operator for query string query: `AND` or
|
|
1365
|
-
`OR`.
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1575
|
+
`OR`. This parameter can be used only when the `q` query string parameter
|
|
1576
|
+
is specified.
|
|
1577
|
+
:param df: The field to use as default where no field prefix is given in the
|
|
1578
|
+
query string. This parameter can be used only when the `q` query string parameter
|
|
1579
|
+
is specified.
|
|
1580
|
+
:param expand_wildcards: The type of index that wildcard patterns can match.
|
|
1581
|
+
If the request can target data streams, this argument determines whether
|
|
1582
|
+
wildcard expressions match hidden data streams. It supports comma-separated
|
|
1583
|
+
values, such as `open,hidden`.
|
|
1584
|
+
:param from_: Skips the specified number of documents.
|
|
1373
1585
|
:param ignore_unavailable: If `false`, the request returns an error if it targets
|
|
1374
1586
|
a missing or closed index.
|
|
1375
1587
|
:param lenient: If `true`, format-based query failures (such as providing text
|
|
1376
|
-
to a numeric field) in the query string will be ignored.
|
|
1588
|
+
to a numeric field) in the query string will be ignored. This parameter can
|
|
1589
|
+
be used only when the `q` query string parameter is specified.
|
|
1377
1590
|
:param max_docs: The maximum number of documents to delete.
|
|
1378
|
-
:param preference:
|
|
1379
|
-
|
|
1380
|
-
:param q:
|
|
1381
|
-
:param query:
|
|
1591
|
+
:param preference: The node or shard the operation should be performed on. It
|
|
1592
|
+
is random by default.
|
|
1593
|
+
:param q: A query in the Lucene query string syntax.
|
|
1594
|
+
:param query: The documents to delete specified with Query DSL.
|
|
1382
1595
|
:param refresh: If `true`, Elasticsearch refreshes all shards involved in the
|
|
1383
|
-
delete by query after the request completes.
|
|
1596
|
+
delete by query after the request completes. This is different than the delete
|
|
1597
|
+
API's `refresh` parameter, which causes just the shard that received the
|
|
1598
|
+
delete request to be refreshed. Unlike the delete API, it does not support
|
|
1599
|
+
`wait_for`.
|
|
1384
1600
|
:param request_cache: If `true`, the request cache is used for this request.
|
|
1385
1601
|
Defaults to the index-level setting.
|
|
1386
1602
|
:param requests_per_second: The throttle for this request in sub-requests per
|
|
1387
1603
|
second.
|
|
1388
|
-
:param routing:
|
|
1389
|
-
:param scroll:
|
|
1390
|
-
:param scroll_size:
|
|
1391
|
-
:param search_timeout:
|
|
1392
|
-
no timeout.
|
|
1393
|
-
:param search_type: The type of the search operation. Available options
|
|
1394
|
-
`dfs_query_then_fetch`.
|
|
1604
|
+
:param routing: A custom value used to route operations to a specific shard.
|
|
1605
|
+
:param scroll: The period to retain the search context for scrolling.
|
|
1606
|
+
:param scroll_size: The size of the scroll request that powers the operation.
|
|
1607
|
+
:param search_timeout: The explicit timeout for each search request. It defaults
|
|
1608
|
+
to no timeout.
|
|
1609
|
+
:param search_type: The type of the search operation. Available options include
|
|
1610
|
+
`query_then_fetch` and `dfs_query_then_fetch`.
|
|
1395
1611
|
:param slice: Slice the request manually using the provided slice ID and total
|
|
1396
1612
|
number of slices.
|
|
1397
1613
|
:param slices: The number of slices this task should be divided into.
|
|
1398
|
-
:param sort: A comma-separated list of
|
|
1399
|
-
:param stats:
|
|
1400
|
-
:param terminate_after:
|
|
1614
|
+
:param sort: A comma-separated list of `<field>:<direction>` pairs.
|
|
1615
|
+
:param stats: The specific `tag` of the request for logging and statistical purposes.
|
|
1616
|
+
:param terminate_after: The maximum number of documents to collect for each shard.
|
|
1401
1617
|
If a query reaches this limit, Elasticsearch terminates the query early.
|
|
1402
1618
|
Elasticsearch collects documents before sorting. Use with caution. Elasticsearch
|
|
1403
1619
|
applies this parameter to each shard handling the request. When possible,
|
|
1404
1620
|
let Elasticsearch perform early termination automatically. Avoid specifying
|
|
1405
1621
|
this parameter for requests that target data streams with backing indices
|
|
1406
1622
|
across multiple data tiers.
|
|
1407
|
-
:param timeout:
|
|
1623
|
+
:param timeout: The period each deletion request waits for active shards.
|
|
1408
1624
|
:param version: If `true`, returns the document version as part of a hit.
|
|
1409
1625
|
:param wait_for_active_shards: The number of shard copies that must be active
|
|
1410
|
-
before proceeding with the operation. Set to all or any positive integer
|
|
1411
|
-
up to the total number of shards in the index (`number_of_replicas+1`).
|
|
1626
|
+
before proceeding with the operation. Set to `all` or any positive integer
|
|
1627
|
+
up to the total number of shards in the index (`number_of_replicas+1`). The
|
|
1628
|
+
`timeout` value controls how long each write request waits for unavailable
|
|
1629
|
+
shards to become available.
|
|
1412
1630
|
:param wait_for_completion: If `true`, the request blocks until the operation
|
|
1413
|
-
is complete.
|
|
1631
|
+
is complete. If `false`, Elasticsearch performs some preflight checks, launches
|
|
1632
|
+
the request, and returns a task you can use to cancel or get the status of
|
|
1633
|
+
the task. Elasticsearch creates a record of this task as a document at `.tasks/task/${taskId}`.
|
|
1634
|
+
When you are done with a task, you should delete the task document so Elasticsearch
|
|
1635
|
+
can reclaim the space.
|
|
1414
1636
|
"""
|
|
1415
1637
|
if index in SKIP_IN_PATH:
|
|
1416
1638
|
raise ValueError("Empty value passed for parameter 'index'")
|
|
@@ -1523,16 +1745,18 @@ class AsyncElasticsearch(BaseClient):
|
|
|
1523
1745
|
requests_per_second: t.Optional[float] = None,
|
|
1524
1746
|
) -> ObjectApiResponse[t.Any]:
|
|
1525
1747
|
"""
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1748
|
+
.. raw:: html
|
|
1749
|
+
|
|
1750
|
+
<p>Throttle a delete by query operation.</p>
|
|
1751
|
+
<p>Change the number of requests per second for a particular delete by query operation.
|
|
1752
|
+
Rethrottling that speeds up the query takes effect immediately but rethrotting that slows down the query takes effect after completing the current batch to prevent scroll timeouts.</p>
|
|
1530
1753
|
|
|
1531
|
-
|
|
1754
|
+
|
|
1755
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-delete-by-query.html#docs-delete-by-query-rethrottle>`_
|
|
1532
1756
|
|
|
1533
1757
|
:param task_id: The ID for the task.
|
|
1534
1758
|
:param requests_per_second: The throttle for this request in sub-requests per
|
|
1535
|
-
second.
|
|
1759
|
+
second. To disable throttling, set it to `-1`.
|
|
1536
1760
|
"""
|
|
1537
1761
|
if task_id in SKIP_IN_PATH:
|
|
1538
1762
|
raise ValueError("Empty value passed for parameter 'task_id'")
|
|
@@ -1572,16 +1796,22 @@ class AsyncElasticsearch(BaseClient):
|
|
|
1572
1796
|
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1573
1797
|
) -> ObjectApiResponse[t.Any]:
|
|
1574
1798
|
"""
|
|
1575
|
-
|
|
1799
|
+
.. raw:: html
|
|
1576
1800
|
|
|
1577
|
-
|
|
1801
|
+
<p>Delete a script or search template.
|
|
1802
|
+
Deletes a stored script or search template.</p>
|
|
1578
1803
|
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
:param
|
|
1584
|
-
the timeout expires, the request fails
|
|
1804
|
+
|
|
1805
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/delete-stored-script-api.html>`_
|
|
1806
|
+
|
|
1807
|
+
:param id: The identifier for the stored script or search template.
|
|
1808
|
+
:param master_timeout: The period to wait for a connection to the master node.
|
|
1809
|
+
If no response is received before the timeout expires, the request fails
|
|
1810
|
+
and returns an error. It can also be set to `-1` to indicate that the request
|
|
1811
|
+
should never timeout.
|
|
1812
|
+
:param timeout: The period to wait for a response. If no response is received
|
|
1813
|
+
before the timeout expires, the request fails and returns an error. It can
|
|
1814
|
+
also be set to `-1` to indicate that the request should never timeout.
|
|
1585
1815
|
"""
|
|
1586
1816
|
if id in SKIP_IN_PATH:
|
|
1587
1817
|
raise ValueError("Empty value passed for parameter 'id'")
|
|
@@ -1640,32 +1870,60 @@ class AsyncElasticsearch(BaseClient):
|
|
|
1640
1870
|
] = None,
|
|
1641
1871
|
) -> HeadApiResponse:
|
|
1642
1872
|
"""
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
|
|
1873
|
+
.. raw:: html
|
|
1874
|
+
|
|
1875
|
+
<p>Check a document.</p>
|
|
1876
|
+
<p>Verify that a document exists.
|
|
1877
|
+
For example, check to see if a document with the <code>_id</code> 0 exists:</p>
|
|
1878
|
+
<pre><code>HEAD my-index-000001/_doc/0
|
|
1879
|
+
</code></pre>
|
|
1880
|
+
<p>If the document exists, the API returns a status code of <code>200 - OK</code>.
|
|
1881
|
+
If the document doesn’t exist, the API returns <code>404 - Not Found</code>.</p>
|
|
1882
|
+
<p><strong>Versioning support</strong></p>
|
|
1883
|
+
<p>You can use the <code>version</code> parameter to check the document only if its current version is equal to the specified one.</p>
|
|
1884
|
+
<p>Internally, Elasticsearch has marked the old document as deleted and added an entirely new document.
|
|
1885
|
+
The old version of the document doesn't disappear immediately, although you won't be able to access it.
|
|
1886
|
+
Elasticsearch cleans up deleted documents in the background as you continue to index more data.</p>
|
|
1887
|
+
|
|
1888
|
+
|
|
1889
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-get.html>`_
|
|
1890
|
+
|
|
1891
|
+
:param index: A comma-separated list of data streams, indices, and aliases. It
|
|
1892
|
+
supports wildcards (`*`).
|
|
1893
|
+
:param id: A unique document identifier.
|
|
1894
|
+
:param preference: The node or shard the operation should be performed on. By
|
|
1895
|
+
default, the operation is randomized between the shard replicas. If it is
|
|
1896
|
+
set to `_local`, the operation will prefer to be run on a local allocated
|
|
1897
|
+
shard when possible. If it is set to a custom value, the value is used to
|
|
1898
|
+
guarantee that the same shards will be used for the same custom value. This
|
|
1899
|
+
can help with "jumping values" when hitting different shards in different
|
|
1900
|
+
refresh states. A sample value can be something like the web session ID or
|
|
1901
|
+
the user name.
|
|
1652
1902
|
:param realtime: If `true`, the request is real-time as opposed to near-real-time.
|
|
1653
|
-
:param refresh: If `true`,
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
:param
|
|
1659
|
-
the
|
|
1903
|
+
:param refresh: If `true`, the request refreshes the relevant shards before retrieving
|
|
1904
|
+
the document. Setting it to `true` should be done after careful thought and
|
|
1905
|
+
verification that this does not cause a heavy load on the system (and slow
|
|
1906
|
+
down indexing).
|
|
1907
|
+
:param routing: A custom value used to route operations to a specific shard.
|
|
1908
|
+
:param source: Indicates whether to return the `_source` field (`true` or `false`)
|
|
1909
|
+
or lists the fields to return.
|
|
1910
|
+
:param source_excludes: A comma-separated list of source fields to exclude from
|
|
1911
|
+
the response. You can also use this parameter to exclude fields from the
|
|
1912
|
+
subset specified in `_source_includes` query parameter. If the `_source`
|
|
1913
|
+
parameter is `false`, this parameter is ignored.
|
|
1660
1914
|
:param source_includes: A comma-separated list of source fields to include in
|
|
1661
|
-
the response.
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
|
|
1915
|
+
the response. If this parameter is specified, only these source fields are
|
|
1916
|
+
returned. You can exclude fields from this subset using the `_source_excludes`
|
|
1917
|
+
query parameter. If the `_source` parameter is `false`, this parameter is
|
|
1918
|
+
ignored.
|
|
1919
|
+
:param stored_fields: A comma-separated list of stored fields to return as part
|
|
1920
|
+
of a hit. If no fields are specified, no stored fields are included in the
|
|
1921
|
+
response. If this field is specified, the `_source` parameter defaults to
|
|
1922
|
+
`false`.
|
|
1665
1923
|
:param version: Explicit version number for concurrency control. The specified
|
|
1666
1924
|
version must match the current version of the document for the request to
|
|
1667
1925
|
succeed.
|
|
1668
|
-
:param version_type:
|
|
1926
|
+
:param version_type: The version type.
|
|
1669
1927
|
"""
|
|
1670
1928
|
if index in SKIP_IN_PATH:
|
|
1671
1929
|
raise ValueError("Empty value passed for parameter 'index'")
|
|
@@ -1741,29 +1999,38 @@ class AsyncElasticsearch(BaseClient):
|
|
|
1741
1999
|
] = None,
|
|
1742
2000
|
) -> HeadApiResponse:
|
|
1743
2001
|
"""
|
|
1744
|
-
|
|
2002
|
+
.. raw:: html
|
|
1745
2003
|
|
|
1746
|
-
|
|
2004
|
+
<p>Check for a document source.</p>
|
|
2005
|
+
<p>Check whether a document source exists in an index.
|
|
2006
|
+
For example:</p>
|
|
2007
|
+
<pre><code>HEAD my-index-000001/_source/1
|
|
2008
|
+
</code></pre>
|
|
2009
|
+
<p>A document's source is not available if it is disabled in the mapping.</p>
|
|
1747
2010
|
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
:param
|
|
1752
|
-
|
|
1753
|
-
:param
|
|
1754
|
-
:param
|
|
1755
|
-
|
|
1756
|
-
:param
|
|
1757
|
-
:param
|
|
1758
|
-
|
|
2011
|
+
|
|
2012
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-get.html>`_
|
|
2013
|
+
|
|
2014
|
+
:param index: A comma-separated list of data streams, indices, and aliases. It
|
|
2015
|
+
supports wildcards (`*`).
|
|
2016
|
+
:param id: A unique identifier for the document.
|
|
2017
|
+
:param preference: The node or shard the operation should be performed on. By
|
|
2018
|
+
default, the operation is randomized between the shard replicas.
|
|
2019
|
+
:param realtime: If `true`, the request is real-time as opposed to near-real-time.
|
|
2020
|
+
:param refresh: If `true`, the request refreshes the relevant shards before retrieving
|
|
2021
|
+
the document. Setting it to `true` should be done after careful thought and
|
|
2022
|
+
verification that this does not cause a heavy load on the system (and slow
|
|
2023
|
+
down indexing).
|
|
2024
|
+
:param routing: A custom value used to route operations to a specific shard.
|
|
2025
|
+
:param source: Indicates whether to return the `_source` field (`true` or `false`)
|
|
2026
|
+
or lists the fields to return.
|
|
1759
2027
|
:param source_excludes: A comma-separated list of source fields to exclude in
|
|
1760
2028
|
the response.
|
|
1761
2029
|
:param source_includes: A comma-separated list of source fields to include in
|
|
1762
2030
|
the response.
|
|
1763
|
-
:param version:
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
:param version_type: Specific version type: `external`, `external_gte`.
|
|
2031
|
+
:param version: The version number for concurrency control. It must match the
|
|
2032
|
+
current version of the document for the request to succeed.
|
|
2033
|
+
:param version_type: The version type.
|
|
1767
2034
|
"""
|
|
1768
2035
|
if index in SKIP_IN_PATH:
|
|
1769
2036
|
raise ValueError("Empty value passed for parameter 'index'")
|
|
@@ -1841,34 +2108,47 @@ class AsyncElasticsearch(BaseClient):
|
|
|
1841
2108
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1842
2109
|
) -> ObjectApiResponse[t.Any]:
|
|
1843
2110
|
"""
|
|
1844
|
-
|
|
1845
|
-
|
|
2111
|
+
.. raw:: html
|
|
2112
|
+
|
|
2113
|
+
<p>Explain a document match result.
|
|
2114
|
+
Get information about why a specific document matches, or doesn't match, a query.
|
|
2115
|
+
It computes a score explanation for a query and a specific document.</p>
|
|
2116
|
+
|
|
1846
2117
|
|
|
1847
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2118
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/search-explain.html>`_
|
|
1848
2119
|
|
|
1849
|
-
:param index: Index names used to limit the request. Only a single index
|
|
1850
|
-
can be provided to this parameter.
|
|
1851
|
-
:param id:
|
|
2120
|
+
:param index: Index names that are used to limit the request. Only a single index
|
|
2121
|
+
name can be provided to this parameter.
|
|
2122
|
+
:param id: The document identifier.
|
|
1852
2123
|
:param analyze_wildcard: If `true`, wildcard and prefix queries are analyzed.
|
|
1853
|
-
|
|
1854
|
-
|
|
2124
|
+
This parameter can be used only when the `q` query string parameter is specified.
|
|
2125
|
+
:param analyzer: The analyzer to use for the query string. This parameter can
|
|
2126
|
+
be used only when the `q` query string parameter is specified.
|
|
1855
2127
|
:param default_operator: The default operator for query string query: `AND` or
|
|
1856
|
-
`OR`.
|
|
1857
|
-
|
|
1858
|
-
|
|
2128
|
+
`OR`. This parameter can be used only when the `q` query string parameter
|
|
2129
|
+
is specified.
|
|
2130
|
+
:param df: The field to use as default where no field prefix is given in the
|
|
2131
|
+
query string. This parameter can be used only when the `q` query string parameter
|
|
2132
|
+
is specified.
|
|
1859
2133
|
:param lenient: If `true`, format-based query failures (such as providing text
|
|
1860
|
-
to a numeric field) in the query string will be ignored.
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
|
|
2134
|
+
to a numeric field) in the query string will be ignored. This parameter can
|
|
2135
|
+
be used only when the `q` query string parameter is specified.
|
|
2136
|
+
:param preference: The node or shard the operation should be performed on. It
|
|
2137
|
+
is random by default.
|
|
2138
|
+
:param q: The query in the Lucene query string syntax.
|
|
1864
2139
|
:param query: Defines the search definition using the Query DSL.
|
|
1865
|
-
:param routing:
|
|
1866
|
-
:param source: True or false to return the `_source` field or not
|
|
2140
|
+
:param routing: A custom value used to route operations to a specific shard.
|
|
2141
|
+
:param source: `True` or `false` to return the `_source` field or not or a list
|
|
1867
2142
|
of fields to return.
|
|
1868
2143
|
:param source_excludes: A comma-separated list of source fields to exclude from
|
|
1869
|
-
the response.
|
|
2144
|
+
the response. You can also use this parameter to exclude fields from the
|
|
2145
|
+
subset specified in `_source_includes` query parameter. If the `_source`
|
|
2146
|
+
parameter is `false`, this parameter is ignored.
|
|
1870
2147
|
:param source_includes: A comma-separated list of source fields to include in
|
|
1871
|
-
the response.
|
|
2148
|
+
the response. If this parameter is specified, only these source fields are
|
|
2149
|
+
returned. You can exclude fields from this subset using the `_source_excludes`
|
|
2150
|
+
query parameter. If the `_source` parameter is `false`, this parameter is
|
|
2151
|
+
ignored.
|
|
1872
2152
|
:param stored_fields: A comma-separated list of stored fields to return in the
|
|
1873
2153
|
response.
|
|
1874
2154
|
"""
|
|
@@ -1961,15 +2241,18 @@ class AsyncElasticsearch(BaseClient):
|
|
|
1961
2241
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1962
2242
|
) -> ObjectApiResponse[t.Any]:
|
|
1963
2243
|
"""
|
|
1964
|
-
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
2244
|
+
.. raw:: html
|
|
2245
|
+
|
|
2246
|
+
<p>Get the field capabilities.</p>
|
|
2247
|
+
<p>Get information about the capabilities of fields among multiple indices.</p>
|
|
2248
|
+
<p>For data streams, the API returns field capabilities among the stream’s backing indices.
|
|
2249
|
+
It returns runtime fields like any other field.
|
|
2250
|
+
For example, a runtime field with a type of keyword is returned the same as any other field that belongs to the <code>keyword</code> family.</p>
|
|
2251
|
+
|
|
1969
2252
|
|
|
1970
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2253
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/search-field-caps.html>`_
|
|
1971
2254
|
|
|
1972
|
-
:param index:
|
|
2255
|
+
:param index: A comma-separated list of data streams, indices, and aliases used
|
|
1973
2256
|
to limit the request. Supports wildcards (*). To target all data streams
|
|
1974
2257
|
and indices, omit this parameter or use * or _all.
|
|
1975
2258
|
:param allow_no_indices: If false, the request returns an error if any wildcard
|
|
@@ -1977,25 +2260,32 @@ class AsyncElasticsearch(BaseClient):
|
|
|
1977
2260
|
This behavior applies even if the request targets other open indices. For
|
|
1978
2261
|
example, a request targeting `foo*,bar*` returns an error if an index starts
|
|
1979
2262
|
with foo but no index starts with bar.
|
|
1980
|
-
:param expand_wildcards:
|
|
1981
|
-
request can target data streams, this argument determines whether
|
|
1982
|
-
expressions match hidden data streams. Supports comma-separated
|
|
1983
|
-
as `open,hidden`.
|
|
1984
|
-
:param fields:
|
|
1985
|
-
are supported.
|
|
1986
|
-
:param filters:
|
|
2263
|
+
:param expand_wildcards: The type of index that wildcard patterns can match.
|
|
2264
|
+
If the request can target data streams, this argument determines whether
|
|
2265
|
+
wildcard expressions match hidden data streams. Supports comma-separated
|
|
2266
|
+
values, such as `open,hidden`.
|
|
2267
|
+
:param fields: A list of fields to retrieve capabilities for. Wildcard (`*`)
|
|
2268
|
+
expressions are supported.
|
|
2269
|
+
:param filters: A comma-separated list of filters to apply to the response.
|
|
1987
2270
|
:param ignore_unavailable: If `true`, missing or closed indices are not included
|
|
1988
2271
|
in the response.
|
|
1989
2272
|
:param include_empty_fields: If false, empty fields are not included in the response.
|
|
1990
2273
|
:param include_unmapped: If true, unmapped fields are included in the response.
|
|
1991
|
-
:param index_filter:
|
|
1992
|
-
|
|
1993
|
-
|
|
2274
|
+
:param index_filter: Filter indices if the provided query rewrites to `match_none`
|
|
2275
|
+
on every shard. IMPORTANT: The filtering is done on a best-effort basis,
|
|
2276
|
+
it uses index statistics and mappings to rewrite queries to `match_none`
|
|
2277
|
+
instead of fully running the request. For instance a range query over a date
|
|
2278
|
+
field can rewrite to `match_none` if all documents within a shard (including
|
|
2279
|
+
deleted documents) are outside of the provided range. However, not all queries
|
|
2280
|
+
can rewrite to `match_none` so this API may return an index even if the provided
|
|
2281
|
+
filter matches no document.
|
|
2282
|
+
:param runtime_mappings: Define ad-hoc runtime fields in the request similar
|
|
1994
2283
|
to the way it is done in search requests. These fields exist only as part
|
|
1995
2284
|
of the query and take precedence over fields defined with the same name in
|
|
1996
2285
|
the index mappings.
|
|
1997
|
-
:param types:
|
|
1998
|
-
|
|
2286
|
+
:param types: A comma-separated list of field types to include. Any fields that
|
|
2287
|
+
do not match one of these types will be excluded from the results. It defaults
|
|
2288
|
+
to empty, meaning that all field types are returned.
|
|
1999
2289
|
"""
|
|
2000
2290
|
__path_parts: t.Dict[str, str]
|
|
2001
2291
|
if index not in SKIP_IN_PATH:
|
|
@@ -2081,36 +2371,87 @@ class AsyncElasticsearch(BaseClient):
|
|
|
2081
2371
|
] = None,
|
|
2082
2372
|
) -> ObjectApiResponse[t.Any]:
|
|
2083
2373
|
"""
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2374
|
+
.. raw:: html
|
|
2375
|
+
|
|
2376
|
+
<p>Get a document by its ID.</p>
|
|
2377
|
+
<p>Get a document and its source or stored fields from an index.</p>
|
|
2378
|
+
<p>By default, this API is realtime and is not affected by the refresh rate of the index (when data will become visible for search).
|
|
2379
|
+
In the case where stored fields are requested with the <code>stored_fields</code> parameter and the document has been updated but is not yet refreshed, the API will have to parse and analyze the source to extract the stored fields.
|
|
2380
|
+
To turn off realtime behavior, set the <code>realtime</code> parameter to false.</p>
|
|
2381
|
+
<p><strong>Source filtering</strong></p>
|
|
2382
|
+
<p>By default, the API returns the contents of the <code>_source</code> field unless you have used the <code>stored_fields</code> parameter or the <code>_source</code> field is turned off.
|
|
2383
|
+
You can turn off <code>_source</code> retrieval by using the <code>_source</code> parameter:</p>
|
|
2384
|
+
<pre><code>GET my-index-000001/_doc/0?_source=false
|
|
2385
|
+
</code></pre>
|
|
2386
|
+
<p>If you only need one or two fields from the <code>_source</code>, use the <code>_source_includes</code> or <code>_source_excludes</code> parameters to include or filter out particular fields.
|
|
2387
|
+
This can be helpful with large documents where partial retrieval can save on network overhead
|
|
2388
|
+
Both parameters take a comma separated list of fields or wildcard expressions.
|
|
2389
|
+
For example:</p>
|
|
2390
|
+
<pre><code>GET my-index-000001/_doc/0?_source_includes=*.id&_source_excludes=entities
|
|
2391
|
+
</code></pre>
|
|
2392
|
+
<p>If you only want to specify includes, you can use a shorter notation:</p>
|
|
2393
|
+
<pre><code>GET my-index-000001/_doc/0?_source=*.id
|
|
2394
|
+
</code></pre>
|
|
2395
|
+
<p><strong>Routing</strong></p>
|
|
2396
|
+
<p>If routing is used during indexing, the routing value also needs to be specified to retrieve a document.
|
|
2397
|
+
For example:</p>
|
|
2398
|
+
<pre><code>GET my-index-000001/_doc/2?routing=user1
|
|
2399
|
+
</code></pre>
|
|
2400
|
+
<p>This request gets the document with ID 2, but it is routed based on the user.
|
|
2401
|
+
The document is not fetched if the correct routing is not specified.</p>
|
|
2402
|
+
<p><strong>Distributed</strong></p>
|
|
2403
|
+
<p>The GET operation is hashed into a specific shard ID.
|
|
2404
|
+
It is then redirected to one of the replicas within that shard ID and returns the result.
|
|
2405
|
+
The replicas are the primary shard and its replicas within that shard ID group.
|
|
2406
|
+
This means that the more replicas you have, the better your GET scaling will be.</p>
|
|
2407
|
+
<p><strong>Versioning support</strong></p>
|
|
2408
|
+
<p>You can use the <code>version</code> parameter to retrieve the document only if its current version is equal to the specified one.</p>
|
|
2409
|
+
<p>Internally, Elasticsearch has marked the old document as deleted and added an entirely new document.
|
|
2410
|
+
The old version of the document doesn't disappear immediately, although you won't be able to access it.
|
|
2411
|
+
Elasticsearch cleans up deleted documents in the background as you continue to index more data.</p>
|
|
2412
|
+
|
|
2413
|
+
|
|
2414
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-get.html>`_
|
|
2415
|
+
|
|
2416
|
+
:param index: The name of the index that contains the document.
|
|
2417
|
+
:param id: A unique document identifier.
|
|
2418
|
+
:param force_synthetic_source: Indicates whether the request forces synthetic
|
|
2419
|
+
`_source`. Use this paramater to test if the mapping supports synthetic `_source`
|
|
2420
|
+
and to get a sense of the worst case performance. Fetches with this parameter
|
|
2421
|
+
enabled will be slower than enabling synthetic source natively in the index.
|
|
2422
|
+
:param preference: The node or shard the operation should be performed on. By
|
|
2423
|
+
default, the operation is randomized between the shard replicas. If it is
|
|
2424
|
+
set to `_local`, the operation will prefer to be run on a local allocated
|
|
2425
|
+
shard when possible. If it is set to a custom value, the value is used to
|
|
2426
|
+
guarantee that the same shards will be used for the same custom value. This
|
|
2427
|
+
can help with "jumping values" when hitting different shards in different
|
|
2428
|
+
refresh states. A sample value can be something like the web session ID or
|
|
2429
|
+
the user name.
|
|
2097
2430
|
:param realtime: If `true`, the request is real-time as opposed to near-real-time.
|
|
2098
|
-
:param refresh: If true
|
|
2099
|
-
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
:param
|
|
2104
|
-
the
|
|
2431
|
+
:param refresh: If `true`, the request refreshes the relevant shards before retrieving
|
|
2432
|
+
the document. Setting it to `true` should be done after careful thought and
|
|
2433
|
+
verification that this does not cause a heavy load on the system (and slow
|
|
2434
|
+
down indexing).
|
|
2435
|
+
:param routing: A custom value used to route operations to a specific shard.
|
|
2436
|
+
:param source: Indicates whether to return the `_source` field (`true` or `false`)
|
|
2437
|
+
or lists the fields to return.
|
|
2438
|
+
:param source_excludes: A comma-separated list of source fields to exclude from
|
|
2439
|
+
the response. You can also use this parameter to exclude fields from the
|
|
2440
|
+
subset specified in `_source_includes` query parameter. If the `_source`
|
|
2441
|
+
parameter is `false`, this parameter is ignored.
|
|
2105
2442
|
:param source_includes: A comma-separated list of source fields to include in
|
|
2106
|
-
the response.
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
:param
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2443
|
+
the response. If this parameter is specified, only these source fields are
|
|
2444
|
+
returned. You can exclude fields from this subset using the `_source_excludes`
|
|
2445
|
+
query parameter. If the `_source` parameter is `false`, this parameter is
|
|
2446
|
+
ignored.
|
|
2447
|
+
:param stored_fields: A comma-separated list of stored fields to return as part
|
|
2448
|
+
of a hit. If no fields are specified, no stored fields are included in the
|
|
2449
|
+
response. If this field is specified, the `_source` parameter defaults to
|
|
2450
|
+
`false`. Only leaf fields can be retrieved with the `stored_field` option.
|
|
2451
|
+
Object fields can't be returned;if specified, the request fails.
|
|
2452
|
+
:param version: The version number for concurrency control. It must match the
|
|
2453
|
+
current version of the document for the request to succeed.
|
|
2454
|
+
:param version_type: The version type.
|
|
2114
2455
|
"""
|
|
2115
2456
|
if index in SKIP_IN_PATH:
|
|
2116
2457
|
raise ValueError("Empty value passed for parameter 'index'")
|
|
@@ -2171,12 +2512,19 @@ class AsyncElasticsearch(BaseClient):
|
|
|
2171
2512
|
pretty: t.Optional[bool] = None,
|
|
2172
2513
|
) -> ObjectApiResponse[t.Any]:
|
|
2173
2514
|
"""
|
|
2174
|
-
|
|
2515
|
+
.. raw:: html
|
|
2516
|
+
|
|
2517
|
+
<p>Get a script or search template.
|
|
2518
|
+
Retrieves a stored script or search template.</p>
|
|
2175
2519
|
|
|
2176
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.17/modules-scripting.html>`_
|
|
2177
2520
|
|
|
2178
|
-
|
|
2179
|
-
|
|
2521
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/get-stored-script-api.html>`_
|
|
2522
|
+
|
|
2523
|
+
:param id: The identifier for the stored script or search template.
|
|
2524
|
+
:param master_timeout: The period to wait for the master node. If the master
|
|
2525
|
+
node is not available before the timeout expires, the request fails and returns
|
|
2526
|
+
an error. It can also be set to `-1` to indicate that the request should
|
|
2527
|
+
never timeout.
|
|
2180
2528
|
"""
|
|
2181
2529
|
if id in SKIP_IN_PATH:
|
|
2182
2530
|
raise ValueError("Empty value passed for parameter 'id'")
|
|
@@ -2213,9 +2561,13 @@ class AsyncElasticsearch(BaseClient):
|
|
|
2213
2561
|
pretty: t.Optional[bool] = None,
|
|
2214
2562
|
) -> ObjectApiResponse[t.Any]:
|
|
2215
2563
|
"""
|
|
2216
|
-
|
|
2564
|
+
.. raw:: html
|
|
2565
|
+
|
|
2566
|
+
<p>Get script contexts.</p>
|
|
2567
|
+
<p>Get a list of supported script contexts and their methods.</p>
|
|
2217
2568
|
|
|
2218
|
-
|
|
2569
|
+
|
|
2570
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/get-script-contexts-api.html>`_
|
|
2219
2571
|
"""
|
|
2220
2572
|
__path_parts: t.Dict[str, str] = {}
|
|
2221
2573
|
__path = "/_script_context"
|
|
@@ -2248,9 +2600,13 @@ class AsyncElasticsearch(BaseClient):
|
|
|
2248
2600
|
pretty: t.Optional[bool] = None,
|
|
2249
2601
|
) -> ObjectApiResponse[t.Any]:
|
|
2250
2602
|
"""
|
|
2251
|
-
|
|
2603
|
+
.. raw:: html
|
|
2604
|
+
|
|
2605
|
+
<p>Get script languages.</p>
|
|
2606
|
+
<p>Get a list of available script types, languages, and contexts.</p>
|
|
2607
|
+
|
|
2252
2608
|
|
|
2253
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2609
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/get-script-languages-api.html>`_
|
|
2254
2610
|
"""
|
|
2255
2611
|
__path_parts: t.Dict[str, str] = {}
|
|
2256
2612
|
__path = "/_script_language"
|
|
@@ -2303,29 +2659,41 @@ class AsyncElasticsearch(BaseClient):
|
|
|
2303
2659
|
] = None,
|
|
2304
2660
|
) -> ObjectApiResponse[t.Any]:
|
|
2305
2661
|
"""
|
|
2306
|
-
|
|
2662
|
+
.. raw:: html
|
|
2307
2663
|
|
|
2308
|
-
|
|
2664
|
+
<p>Get a document's source.</p>
|
|
2665
|
+
<p>Get the source of a document.
|
|
2666
|
+
For example:</p>
|
|
2667
|
+
<pre><code>GET my-index-000001/_source/1
|
|
2668
|
+
</code></pre>
|
|
2669
|
+
<p>You can use the source filtering parameters to control which parts of the <code>_source</code> are returned:</p>
|
|
2670
|
+
<pre><code>GET my-index-000001/_source/1/?_source_includes=*.id&_source_excludes=entities
|
|
2671
|
+
</code></pre>
|
|
2309
2672
|
|
|
2310
|
-
|
|
2311
|
-
|
|
2312
|
-
|
|
2313
|
-
|
|
2314
|
-
:param
|
|
2315
|
-
:param
|
|
2316
|
-
|
|
2317
|
-
:param
|
|
2318
|
-
:param
|
|
2319
|
-
|
|
2673
|
+
|
|
2674
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-get.html>`_
|
|
2675
|
+
|
|
2676
|
+
:param index: The name of the index that contains the document.
|
|
2677
|
+
:param id: A unique document identifier.
|
|
2678
|
+
:param preference: The node or shard the operation should be performed on. By
|
|
2679
|
+
default, the operation is randomized between the shard replicas.
|
|
2680
|
+
:param realtime: If `true`, the request is real-time as opposed to near-real-time.
|
|
2681
|
+
:param refresh: If `true`, the request refreshes the relevant shards before retrieving
|
|
2682
|
+
the document. Setting it to `true` should be done after careful thought and
|
|
2683
|
+
verification that this does not cause a heavy load on the system (and slow
|
|
2684
|
+
down indexing).
|
|
2685
|
+
:param routing: A custom value used to route operations to a specific shard.
|
|
2686
|
+
:param source: Indicates whether to return the `_source` field (`true` or `false`)
|
|
2687
|
+
or lists the fields to return.
|
|
2320
2688
|
:param source_excludes: A comma-separated list of source fields to exclude in
|
|
2321
2689
|
the response.
|
|
2322
2690
|
:param source_includes: A comma-separated list of source fields to include in
|
|
2323
2691
|
the response.
|
|
2324
|
-
:param stored_fields:
|
|
2325
|
-
|
|
2326
|
-
|
|
2327
|
-
succeed.
|
|
2328
|
-
:param version_type:
|
|
2692
|
+
:param stored_fields: A comma-separated list of stored fields to return as part
|
|
2693
|
+
of a hit.
|
|
2694
|
+
:param version: The version number for concurrency control. It must match the
|
|
2695
|
+
current version of the document for the request to succeed.
|
|
2696
|
+
:param version_type: The version type.
|
|
2329
2697
|
"""
|
|
2330
2698
|
if index in SKIP_IN_PATH:
|
|
2331
2699
|
raise ValueError("Empty value passed for parameter 'index'")
|
|
@@ -2386,28 +2754,24 @@ class AsyncElasticsearch(BaseClient):
|
|
|
2386
2754
|
verbose: t.Optional[bool] = None,
|
|
2387
2755
|
) -> ObjectApiResponse[t.Any]:
|
|
2388
2756
|
"""
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
|
|
2395
|
-
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
|
|
2399
|
-
|
|
2400
|
-
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
|
-
|
|
2407
|
-
for health status, set verbose to false to disable the more expensive analysis
|
|
2408
|
-
logic.
|
|
2409
|
-
|
|
2410
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.17/health-api.html>`_
|
|
2757
|
+
.. raw:: html
|
|
2758
|
+
|
|
2759
|
+
<p>Get the cluster health.
|
|
2760
|
+
Get a report with the health status of an Elasticsearch cluster.
|
|
2761
|
+
The report contains a list of indicators that compose Elasticsearch functionality.</p>
|
|
2762
|
+
<p>Each indicator has a health status of: green, unknown, yellow or red.
|
|
2763
|
+
The indicator will provide an explanation and metadata describing the reason for its current health status.</p>
|
|
2764
|
+
<p>The cluster’s status is controlled by the worst indicator status.</p>
|
|
2765
|
+
<p>In the event that an indicator’s status is non-green, a list of impacts may be present in the indicator result which detail the functionalities that are negatively affected by the health issue.
|
|
2766
|
+
Each impact carries with it a severity level, an area of the system that is affected, and a simple description of the impact on the system.</p>
|
|
2767
|
+
<p>Some health indicators can determine the root cause of a health problem and prescribe a set of steps that can be performed in order to improve the health of the system.
|
|
2768
|
+
The root cause and remediation steps are encapsulated in a diagnosis.
|
|
2769
|
+
A diagnosis contains a cause detailing a root cause analysis, an action containing a brief description of the steps to take to fix the problem, the list of affected resources (if applicable), and a detailed step-by-step troubleshooting guide to fix the diagnosed problem.</p>
|
|
2770
|
+
<p>NOTE: The health indicators perform root cause analysis of non-green health statuses. This can be computationally expensive when called frequently.
|
|
2771
|
+
When setting up automated polling of the API for health status, set verbose to false to disable the more expensive analysis logic.</p>
|
|
2772
|
+
|
|
2773
|
+
|
|
2774
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/health-api.html>`_
|
|
2411
2775
|
|
|
2412
2776
|
:param feature: A feature of the cluster, as returned by the top-level health
|
|
2413
2777
|
report API.
|
|
@@ -2462,6 +2826,7 @@ class AsyncElasticsearch(BaseClient):
|
|
|
2462
2826
|
human: t.Optional[bool] = None,
|
|
2463
2827
|
if_primary_term: t.Optional[int] = None,
|
|
2464
2828
|
if_seq_no: t.Optional[int] = None,
|
|
2829
|
+
include_source_on_error: t.Optional[bool] = None,
|
|
2465
2830
|
op_type: t.Optional[t.Union[str, t.Literal["create", "index"]]] = None,
|
|
2466
2831
|
pipeline: t.Optional[str] = None,
|
|
2467
2832
|
pretty: t.Optional[bool] = None,
|
|
@@ -2480,44 +2845,148 @@ class AsyncElasticsearch(BaseClient):
|
|
|
2480
2845
|
] = None,
|
|
2481
2846
|
) -> ObjectApiResponse[t.Any]:
|
|
2482
2847
|
"""
|
|
2483
|
-
|
|
2484
|
-
|
|
2485
|
-
|
|
2486
|
-
|
|
2487
|
-
|
|
2488
|
-
|
|
2489
|
-
|
|
2848
|
+
.. raw:: html
|
|
2849
|
+
|
|
2850
|
+
<p>Create or update a document in an index.</p>
|
|
2851
|
+
<p>Add a JSON document to the specified data stream or index and make it searchable.
|
|
2852
|
+
If the target is an index and the document already exists, the request updates the document and increments its version.</p>
|
|
2853
|
+
<p>NOTE: You cannot use this API to send update requests for existing documents in a data stream.</p>
|
|
2854
|
+
<p>If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or index alias:</p>
|
|
2855
|
+
<ul>
|
|
2856
|
+
<li>To add or overwrite a document using the <code>PUT /<target>/_doc/<_id></code> request format, you must have the <code>create</code>, <code>index</code>, or <code>write</code> index privilege.</li>
|
|
2857
|
+
<li>To add a document using the <code>POST /<target>/_doc/</code> request format, you must have the <code>create_doc</code>, <code>create</code>, <code>index</code>, or <code>write</code> index privilege.</li>
|
|
2858
|
+
<li>To automatically create a data stream or index with this API request, you must have the <code>auto_configure</code>, <code>create_index</code>, or <code>manage</code> index privilege.</li>
|
|
2859
|
+
</ul>
|
|
2860
|
+
<p>Automatic data stream creation requires a matching index template with data stream enabled.</p>
|
|
2861
|
+
<p>NOTE: Replica shards might not all be started when an indexing operation returns successfully.
|
|
2862
|
+
By default, only the primary is required. Set <code>wait_for_active_shards</code> to change this default behavior.</p>
|
|
2863
|
+
<p><strong>Automatically create data streams and indices</strong></p>
|
|
2864
|
+
<p>If the request's target doesn't exist and matches an index template with a <code>data_stream</code> definition, the index operation automatically creates the data stream.</p>
|
|
2865
|
+
<p>If the target doesn't exist and doesn't match a data stream template, the operation automatically creates the index and applies any matching index templates.</p>
|
|
2866
|
+
<p>NOTE: Elasticsearch includes several built-in index templates. To avoid naming collisions with these templates, refer to index pattern documentation.</p>
|
|
2867
|
+
<p>If no mapping exists, the index operation creates a dynamic mapping.
|
|
2868
|
+
By default, new fields and objects are automatically added to the mapping if needed.</p>
|
|
2869
|
+
<p>Automatic index creation is controlled by the <code>action.auto_create_index</code> setting.
|
|
2870
|
+
If it is <code>true</code>, any index can be created automatically.
|
|
2871
|
+
You can modify this setting to explicitly allow or block automatic creation of indices that match specified patterns or set it to <code>false</code> to turn off automatic index creation entirely.
|
|
2872
|
+
Specify a comma-separated list of patterns you want to allow or prefix each pattern with <code>+</code> or <code>-</code> to indicate whether it should be allowed or blocked.
|
|
2873
|
+
When a list is specified, the default behaviour is to disallow.</p>
|
|
2874
|
+
<p>NOTE: The <code>action.auto_create_index</code> setting affects the automatic creation of indices only.
|
|
2875
|
+
It does not affect the creation of data streams.</p>
|
|
2876
|
+
<p><strong>Optimistic concurrency control</strong></p>
|
|
2877
|
+
<p>Index operations can be made conditional and only be performed if the last modification to the document was assigned the sequence number and primary term specified by the <code>if_seq_no</code> and <code>if_primary_term</code> parameters.
|
|
2878
|
+
If a mismatch is detected, the operation will result in a <code>VersionConflictException</code> and a status code of <code>409</code>.</p>
|
|
2879
|
+
<p><strong>Routing</strong></p>
|
|
2880
|
+
<p>By default, shard placement — or routing — is controlled by using a hash of the document's ID value.
|
|
2881
|
+
For more explicit control, the value fed into the hash function used by the router can be directly specified on a per-operation basis using the <code>routing</code> parameter.</p>
|
|
2882
|
+
<p>When setting up explicit mapping, you can also use the <code>_routing</code> field to direct the index operation to extract the routing value from the document itself.
|
|
2883
|
+
This does come at the (very minimal) cost of an additional document parsing pass.
|
|
2884
|
+
If the <code>_routing</code> mapping is defined and set to be required, the index operation will fail if no routing value is provided or extracted.</p>
|
|
2885
|
+
<p>NOTE: Data streams do not support custom routing unless they were created with the <code>allow_custom_routing</code> setting enabled in the template.</p>
|
|
2886
|
+
<p><strong>Distributed</strong></p>
|
|
2887
|
+
<p>The index operation is directed to the primary shard based on its route and performed on the actual node containing this shard.
|
|
2888
|
+
After the primary shard completes the operation, if needed, the update is distributed to applicable replicas.</p>
|
|
2889
|
+
<p><strong>Active shards</strong></p>
|
|
2890
|
+
<p>To improve the resiliency of writes to the system, indexing operations can be configured to wait for a certain number of active shard copies before proceeding with the operation.
|
|
2891
|
+
If the requisite number of active shard copies are not available, then the write operation must wait and retry, until either the requisite shard copies have started or a timeout occurs.
|
|
2892
|
+
By default, write operations only wait for the primary shards to be active before proceeding (that is to say <code>wait_for_active_shards</code> is <code>1</code>).
|
|
2893
|
+
This default can be overridden in the index settings dynamically by setting <code>index.write.wait_for_active_shards</code>.
|
|
2894
|
+
To alter this behavior per operation, use the <code>wait_for_active_shards request</code> parameter.</p>
|
|
2895
|
+
<p>Valid values are all or any positive integer up to the total number of configured copies per shard in the index (which is <code>number_of_replicas</code>+1).
|
|
2896
|
+
Specifying a negative value or a number greater than the number of shard copies will throw an error.</p>
|
|
2897
|
+
<p>For example, suppose you have a cluster of three nodes, A, B, and C and you create an index index with the number of replicas set to 3 (resulting in 4 shard copies, one more copy than there are nodes).
|
|
2898
|
+
If you attempt an indexing operation, by default the operation will only ensure the primary copy of each shard is available before proceeding.
|
|
2899
|
+
This means that even if B and C went down and A hosted the primary shard copies, the indexing operation would still proceed with only one copy of the data.
|
|
2900
|
+
If <code>wait_for_active_shards</code> is set on the request to <code>3</code> (and all three nodes are up), the indexing operation will require 3 active shard copies before proceeding.
|
|
2901
|
+
This requirement should be met because there are 3 active nodes in the cluster, each one holding a copy of the shard.
|
|
2902
|
+
However, if you set <code>wait_for_active_shards</code> to <code>all</code> (or to <code>4</code>, which is the same in this situation), the indexing operation will not proceed as you do not have all 4 copies of each shard active in the index.
|
|
2903
|
+
The operation will timeout unless a new node is brought up in the cluster to host the fourth copy of the shard.</p>
|
|
2904
|
+
<p>It is important to note that this setting greatly reduces the chances of the write operation not writing to the requisite number of shard copies, but it does not completely eliminate the possibility, because this check occurs before the write operation starts.
|
|
2905
|
+
After the write operation is underway, it is still possible for replication to fail on any number of shard copies but still succeed on the primary.
|
|
2906
|
+
The <code>_shards</code> section of the API response reveals the number of shard copies on which replication succeeded and failed.</p>
|
|
2907
|
+
<p><strong>No operation (noop) updates</strong></p>
|
|
2908
|
+
<p>When updating a document by using this API, a new version of the document is always created even if the document hasn't changed.
|
|
2909
|
+
If this isn't acceptable use the <code>_update</code> API with <code>detect_noop</code> set to <code>true</code>.
|
|
2910
|
+
The <code>detect_noop</code> option isn't available on this API because it doesn’t fetch the old source and isn't able to compare it against the new source.</p>
|
|
2911
|
+
<p>There isn't a definitive rule for when noop updates aren't acceptable.
|
|
2912
|
+
It's a combination of lots of factors like how frequently your data source sends updates that are actually noops and how many queries per second Elasticsearch runs on the shard receiving the updates.</p>
|
|
2913
|
+
<p><strong>Versioning</strong></p>
|
|
2914
|
+
<p>Each indexed document is given a version number.
|
|
2915
|
+
By default, internal versioning is used that starts at 1 and increments with each update, deletes included.
|
|
2916
|
+
Optionally, the version number can be set to an external value (for example, if maintained in a database).
|
|
2917
|
+
To enable this functionality, <code>version_type</code> should be set to <code>external</code>.
|
|
2918
|
+
The value provided must be a numeric, long value greater than or equal to 0, and less than around <code>9.2e+18</code>.</p>
|
|
2919
|
+
<p>NOTE: Versioning is completely real time, and is not affected by the near real time aspects of search operations.
|
|
2920
|
+
If no version is provided, the operation runs without any version checks.</p>
|
|
2921
|
+
<p>When using the external version type, the system checks to see if the version number passed to the index request is greater than the version of the currently stored document.
|
|
2922
|
+
If true, the document will be indexed and the new version number used.
|
|
2923
|
+
If the value provided is less than or equal to the stored document's version number, a version conflict will occur and the index operation will fail. For example:</p>
|
|
2924
|
+
<pre><code>PUT my-index-000001/_doc/1?version=2&version_type=external
|
|
2925
|
+
{
|
|
2926
|
+
"user": {
|
|
2927
|
+
"id": "elkbee"
|
|
2928
|
+
}
|
|
2929
|
+
}
|
|
2930
|
+
|
|
2931
|
+
In this example, the operation will succeed since the supplied version of 2 is higher than the current document version of 1.
|
|
2932
|
+
If the document was already updated and its version was set to 2 or higher, the indexing command will fail and result in a conflict (409 HTTP status code).
|
|
2933
|
+
|
|
2934
|
+
A nice side effect is that there is no need to maintain strict ordering of async indexing operations run as a result of changes to a source database, as long as version numbers from the source database are used.
|
|
2935
|
+
Even the simple case of updating the Elasticsearch index using data from a database is simplified if external versioning is used, as only the latest version will be used if the index operations arrive out of order.
|
|
2936
|
+
</code></pre>
|
|
2937
|
+
|
|
2938
|
+
|
|
2939
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-index_.html>`_
|
|
2940
|
+
|
|
2941
|
+
:param index: The name of the data stream or index to target. If the target doesn't
|
|
2942
|
+
exist and matches the name or wildcard (`*`) pattern of an index template
|
|
2943
|
+
with a `data_stream` definition, this request creates the data stream. If
|
|
2944
|
+
the target doesn't exist and doesn't match a data stream template, this request
|
|
2945
|
+
creates the index. You can check for existing targets with the resolve index
|
|
2946
|
+
API.
|
|
2490
2947
|
:param document:
|
|
2491
|
-
:param id:
|
|
2948
|
+
:param id: A unique identifier for the document. To automatically generate a
|
|
2949
|
+
document ID, use the `POST /<target>/_doc/` request format and omit this
|
|
2950
|
+
parameter.
|
|
2492
2951
|
:param if_primary_term: Only perform the operation if the document has this primary
|
|
2493
2952
|
term.
|
|
2494
2953
|
:param if_seq_no: Only perform the operation if the document has this sequence
|
|
2495
2954
|
number.
|
|
2496
|
-
:param
|
|
2955
|
+
:param include_source_on_error: True or false if to include the document source
|
|
2956
|
+
in the error message in case of parsing errors.
|
|
2957
|
+
:param op_type: Set to `create` to only index the document if it does not already
|
|
2497
2958
|
exist (put if absent). If a document with the specified `_id` already exists,
|
|
2498
|
-
the indexing operation will fail.
|
|
2499
|
-
|
|
2500
|
-
|
|
2501
|
-
|
|
2959
|
+
the indexing operation will fail. The behavior is the same as using the `<index>/_create`
|
|
2960
|
+
endpoint. If a document ID is specified, this paramater defaults to `index`.
|
|
2961
|
+
Otherwise, it defaults to `create`. If the request targets a data stream,
|
|
2962
|
+
an `op_type` of `create` is required.
|
|
2963
|
+
:param pipeline: The ID of the pipeline to use to preprocess incoming documents.
|
|
2502
2964
|
If the index has a default ingest pipeline specified, then setting the value
|
|
2503
2965
|
to `_none` disables the default ingest pipeline for this request. If a final
|
|
2504
2966
|
pipeline is configured it will always run, regardless of the value of this
|
|
2505
2967
|
parameter.
|
|
2506
2968
|
:param refresh: If `true`, Elasticsearch refreshes the affected shards to make
|
|
2507
|
-
this operation visible to search
|
|
2508
|
-
make this operation visible to search
|
|
2509
|
-
Valid values: `true`, `false`, `wait_for`.
|
|
2969
|
+
this operation visible to search. If `wait_for`, it waits for a refresh to
|
|
2970
|
+
make this operation visible to search. If `false`, it does nothing with refreshes.
|
|
2510
2971
|
:param require_alias: If `true`, the destination must be an index alias.
|
|
2511
|
-
:param routing:
|
|
2512
|
-
|
|
2513
|
-
|
|
2514
|
-
|
|
2515
|
-
|
|
2516
|
-
|
|
2517
|
-
|
|
2972
|
+
:param routing: A custom value that is used to route operations to a specific
|
|
2973
|
+
shard.
|
|
2974
|
+
:param timeout: The period the request waits for the following operations: automatic
|
|
2975
|
+
index creation, dynamic mapping updates, waiting for active shards. This
|
|
2976
|
+
parameter is useful for situations where the primary shard assigned to perform
|
|
2977
|
+
the operation might not be available when the operation runs. Some reasons
|
|
2978
|
+
for this might be that the primary shard is currently recovering from a gateway
|
|
2979
|
+
or undergoing relocation. By default, the operation will wait on the primary
|
|
2980
|
+
shard to become available for at least 1 minute before failing and responding
|
|
2981
|
+
with an error. The actual wait time could be longer, particularly when multiple
|
|
2982
|
+
waits occur.
|
|
2983
|
+
:param version: An explicit version number for concurrency control. It must be
|
|
2984
|
+
a non-negative long number.
|
|
2985
|
+
:param version_type: The version type.
|
|
2518
2986
|
:param wait_for_active_shards: The number of shard copies that must be active
|
|
2519
|
-
before proceeding with the operation.
|
|
2520
|
-
up to the total number of shards in the index (`number_of_replicas+1`).
|
|
2987
|
+
before proceeding with the operation. You can set it to `all` or any positive
|
|
2988
|
+
integer up to the total number of shards in the index (`number_of_replicas+1`).
|
|
2989
|
+
The default value of `1` means it waits for each primary shard to be active.
|
|
2521
2990
|
"""
|
|
2522
2991
|
if index in SKIP_IN_PATH:
|
|
2523
2992
|
raise ValueError("Empty value passed for parameter 'index'")
|
|
@@ -2549,6 +3018,8 @@ class AsyncElasticsearch(BaseClient):
|
|
|
2549
3018
|
__query["if_primary_term"] = if_primary_term
|
|
2550
3019
|
if if_seq_no is not None:
|
|
2551
3020
|
__query["if_seq_no"] = if_seq_no
|
|
3021
|
+
if include_source_on_error is not None:
|
|
3022
|
+
__query["include_source_on_error"] = include_source_on_error
|
|
2552
3023
|
if op_type is not None:
|
|
2553
3024
|
__query["op_type"] = op_type
|
|
2554
3025
|
if pipeline is not None:
|
|
@@ -2591,9 +3062,13 @@ class AsyncElasticsearch(BaseClient):
|
|
|
2591
3062
|
pretty: t.Optional[bool] = None,
|
|
2592
3063
|
) -> ObjectApiResponse[t.Any]:
|
|
2593
3064
|
"""
|
|
2594
|
-
|
|
3065
|
+
.. raw:: html
|
|
3066
|
+
|
|
3067
|
+
<p>Get cluster info.
|
|
3068
|
+
Get basic build, version, and cluster information.</p>
|
|
3069
|
+
|
|
2595
3070
|
|
|
2596
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
3071
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/rest-api-root.html>`_
|
|
2597
3072
|
"""
|
|
2598
3073
|
__path_parts: t.Dict[str, str] = {}
|
|
2599
3074
|
__path = "/"
|
|
@@ -2648,38 +3123,48 @@ class AsyncElasticsearch(BaseClient):
|
|
|
2648
3123
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
2649
3124
|
) -> ObjectApiResponse[t.Any]:
|
|
2650
3125
|
"""
|
|
2651
|
-
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
|
|
2655
|
-
|
|
2656
|
-
|
|
2657
|
-
|
|
2658
|
-
|
|
2659
|
-
|
|
2660
|
-
|
|
2661
|
-
|
|
3126
|
+
.. raw:: html
|
|
3127
|
+
|
|
3128
|
+
<p>Run a knn search.</p>
|
|
3129
|
+
<p>NOTE: The kNN search API has been replaced by the <code>knn</code> option in the search API.</p>
|
|
3130
|
+
<p>Perform a k-nearest neighbor (kNN) search on a dense_vector field and return the matching documents.
|
|
3131
|
+
Given a query vector, the API finds the k closest vectors and returns those documents as search hits.</p>
|
|
3132
|
+
<p>Elasticsearch uses the HNSW algorithm to support efficient kNN search.
|
|
3133
|
+
Like most kNN algorithms, HNSW is an approximate method that sacrifices result accuracy for improved search speed.
|
|
3134
|
+
This means the results returned are not always the true k closest neighbors.</p>
|
|
3135
|
+
<p>The kNN search API supports restricting the search using a filter.
|
|
3136
|
+
The search will return the top k documents that also match the filter query.</p>
|
|
3137
|
+
<p>A kNN search response has the exact same structure as a search API response.
|
|
3138
|
+
However, certain sections have a meaning specific to kNN search:</p>
|
|
3139
|
+
<ul>
|
|
3140
|
+
<li>The document <code>_score</code> is determined by the similarity between the query and document vector.</li>
|
|
3141
|
+
<li>The <code>hits.total</code> object contains the total number of nearest neighbor candidates considered, which is <code>num_candidates * num_shards</code>. The <code>hits.total.relation</code> will always be <code>eq</code>, indicating an exact value.</li>
|
|
3142
|
+
</ul>
|
|
3143
|
+
|
|
3144
|
+
|
|
3145
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/knn-search-api.html>`_
|
|
2662
3146
|
|
|
2663
3147
|
:param index: A comma-separated list of index names to search; use `_all` or
|
|
2664
|
-
to perform the operation on all indices
|
|
2665
|
-
:param knn: kNN query to
|
|
3148
|
+
to perform the operation on all indices.
|
|
3149
|
+
:param knn: The kNN query to run.
|
|
2666
3150
|
:param docvalue_fields: The request returns doc values for field names matching
|
|
2667
|
-
these patterns in the hits.fields property of the response.
|
|
2668
|
-
(
|
|
3151
|
+
these patterns in the `hits.fields` property of the response. It accepts
|
|
3152
|
+
wildcard (`*`) patterns.
|
|
2669
3153
|
:param fields: The request returns values for field names matching these patterns
|
|
2670
|
-
in the hits.fields property of the response.
|
|
2671
|
-
|
|
2672
|
-
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
3154
|
+
in the `hits.fields` property of the response. It accepts wildcard (`*`)
|
|
3155
|
+
patterns.
|
|
3156
|
+
:param filter: A query to filter the documents that can match. The kNN search
|
|
3157
|
+
will return the top `k` documents that also match this filter. The value
|
|
3158
|
+
can be a single query or a list of queries. If `filter` isn't provided, all
|
|
3159
|
+
documents are allowed to match.
|
|
3160
|
+
:param routing: A comma-separated list of specific routing values.
|
|
2676
3161
|
:param source: Indicates which source fields are returned for matching documents.
|
|
2677
|
-
These fields are returned in the hits._source property of the search response.
|
|
2678
|
-
:param stored_fields:
|
|
2679
|
-
fields are specified, no stored fields are included in the response. If
|
|
2680
|
-
field is specified, the _source parameter defaults to false
|
|
2681
|
-
_source: true to return both source fields and stored fields in
|
|
2682
|
-
response.
|
|
3162
|
+
These fields are returned in the `hits._source` property of the search response.
|
|
3163
|
+
:param stored_fields: A list of stored fields to return as part of a hit. If
|
|
3164
|
+
no fields are specified, no stored fields are included in the response. If
|
|
3165
|
+
this field is specified, the `_source` parameter defaults to `false`. You
|
|
3166
|
+
can pass `_source: true` to return both source fields and stored fields in
|
|
3167
|
+
the search response.
|
|
2683
3168
|
"""
|
|
2684
3169
|
if index in SKIP_IN_PATH:
|
|
2685
3170
|
raise ValueError("Empty value passed for parameter 'index'")
|
|
@@ -2757,12 +3242,23 @@ class AsyncElasticsearch(BaseClient):
|
|
|
2757
3242
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
2758
3243
|
) -> ObjectApiResponse[t.Any]:
|
|
2759
3244
|
"""
|
|
2760
|
-
|
|
2761
|
-
|
|
2762
|
-
|
|
2763
|
-
|
|
3245
|
+
.. raw:: html
|
|
3246
|
+
|
|
3247
|
+
<p>Get multiple documents.</p>
|
|
3248
|
+
<p>Get multiple JSON documents by ID from one or more indices.
|
|
3249
|
+
If you specify an index in the request URI, you only need to specify the document IDs in the request body.
|
|
3250
|
+
To ensure fast responses, this multi get (mget) API responds with partial results if one or more shards fail.</p>
|
|
3251
|
+
<p><strong>Filter source fields</strong></p>
|
|
3252
|
+
<p>By default, the <code>_source</code> field is returned for every document (if stored).
|
|
3253
|
+
Use the <code>_source</code> and <code>_source_include</code> or <code>source_exclude</code> attributes to filter what fields are returned for a particular document.
|
|
3254
|
+
You can include the <code>_source</code>, <code>_source_includes</code>, and <code>_source_excludes</code> query parameters in the request URI to specify the defaults to use when there are no per-document instructions.</p>
|
|
3255
|
+
<p><strong>Get stored fields</strong></p>
|
|
3256
|
+
<p>Use the <code>stored_fields</code> attribute to specify the set of stored fields you want to retrieve.
|
|
3257
|
+
Any requested fields that are not stored are ignored.
|
|
3258
|
+
You can include the <code>stored_fields</code> query parameter in the request URI to specify the defaults to use when there are no per-document instructions.</p>
|
|
3259
|
+
|
|
2764
3260
|
|
|
2765
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
3261
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-multi-get.html>`_
|
|
2766
3262
|
|
|
2767
3263
|
:param index: Name of the index to retrieve documents from when `ids` are specified,
|
|
2768
3264
|
or when a document in the `docs` array does not specify an index.
|
|
@@ -2881,15 +3377,23 @@ class AsyncElasticsearch(BaseClient):
|
|
|
2881
3377
|
typed_keys: t.Optional[bool] = None,
|
|
2882
3378
|
) -> ObjectApiResponse[t.Any]:
|
|
2883
3379
|
"""
|
|
2884
|
-
|
|
2885
|
-
and makes use of the newline delimited JSON (NDJSON) format. The structure is
|
|
2886
|
-
as follows: ``` header\\n body\\n header\\n body\\n ``` This structure is specifically
|
|
2887
|
-
optimized to reduce parsing if a specific search ends up redirected to another
|
|
2888
|
-
node. IMPORTANT: The final line of data must end with a newline character `\\n`.
|
|
2889
|
-
Each newline character may be preceded by a carriage return `\\r`. When sending
|
|
2890
|
-
requests to this endpoint the `Content-Type` header should be set to `application/x-ndjson`.
|
|
3380
|
+
.. raw:: html
|
|
2891
3381
|
|
|
2892
|
-
|
|
3382
|
+
<p>Run multiple searches.</p>
|
|
3383
|
+
<p>The format of the request is similar to the bulk API format and makes use of the newline delimited JSON (NDJSON) format.
|
|
3384
|
+
The structure is as follows:</p>
|
|
3385
|
+
<pre><code>header\\n
|
|
3386
|
+
body\\n
|
|
3387
|
+
header\\n
|
|
3388
|
+
body\\n
|
|
3389
|
+
</code></pre>
|
|
3390
|
+
<p>This structure is specifically optimized to reduce parsing if a specific search ends up redirected to another node.</p>
|
|
3391
|
+
<p>IMPORTANT: The final line of data must end with a newline character <code>\\n</code>.
|
|
3392
|
+
Each newline character may be preceded by a carriage return <code>\\r</code>.
|
|
3393
|
+
When sending requests to this endpoint the <code>Content-Type</code> header should be set to <code>application/x-ndjson</code>.</p>
|
|
3394
|
+
|
|
3395
|
+
|
|
3396
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/search-multi-search.html>`_
|
|
2893
3397
|
|
|
2894
3398
|
:param searches:
|
|
2895
3399
|
:param index: Comma-separated list of data streams, indices, and index aliases
|
|
@@ -3019,22 +3523,35 @@ class AsyncElasticsearch(BaseClient):
|
|
|
3019
3523
|
typed_keys: t.Optional[bool] = None,
|
|
3020
3524
|
) -> ObjectApiResponse[t.Any]:
|
|
3021
3525
|
"""
|
|
3022
|
-
|
|
3526
|
+
.. raw:: html
|
|
3527
|
+
|
|
3528
|
+
<p>Run multiple templated searches.</p>
|
|
3529
|
+
<p>Run multiple templated searches with a single request.
|
|
3530
|
+
If you are providing a text file or text input to <code>curl</code>, use the <code>--data-binary</code> flag instead of <code>-d</code> to preserve newlines.
|
|
3531
|
+
For example:</p>
|
|
3532
|
+
<pre><code>$ cat requests
|
|
3533
|
+
{ "index": "my-index" }
|
|
3534
|
+
{ "id": "my-search-template", "params": { "query_string": "hello world", "from": 0, "size": 10 }}
|
|
3535
|
+
{ "index": "my-other-index" }
|
|
3536
|
+
{ "id": "my-other-search-template", "params": { "query_type": "match_all" }}
|
|
3023
3537
|
|
|
3024
|
-
|
|
3538
|
+
$ curl -H "Content-Type: application/x-ndjson" -XGET localhost:9200/_msearch/template --data-binary "@requests"; echo
|
|
3539
|
+
</code></pre>
|
|
3540
|
+
|
|
3541
|
+
|
|
3542
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/multi-search-template.html>`_
|
|
3025
3543
|
|
|
3026
3544
|
:param search_templates:
|
|
3027
|
-
:param index:
|
|
3028
|
-
|
|
3029
|
-
parameter or use `*`.
|
|
3545
|
+
:param index: A comma-separated list of data streams, indices, and aliases to
|
|
3546
|
+
search. It supports wildcards (`*`). To search all data streams and indices,
|
|
3547
|
+
omit this parameter or use `*`.
|
|
3030
3548
|
:param ccs_minimize_roundtrips: If `true`, network round-trips are minimized
|
|
3031
3549
|
for cross-cluster search requests.
|
|
3032
|
-
:param max_concurrent_searches:
|
|
3033
|
-
can run.
|
|
3550
|
+
:param max_concurrent_searches: The maximum number of concurrent searches the
|
|
3551
|
+
API can run.
|
|
3034
3552
|
:param rest_total_hits_as_int: If `true`, the response returns `hits.total` as
|
|
3035
3553
|
an integer. If `false`, it returns `hits.total` as an object.
|
|
3036
|
-
:param search_type: The type of the search operation.
|
|
3037
|
-
`dfs_query_then_fetch`.
|
|
3554
|
+
:param search_type: The type of the search operation.
|
|
3038
3555
|
:param typed_keys: If `true`, the response prefixes aggregation and suggester
|
|
3039
3556
|
names with their respective types.
|
|
3040
3557
|
"""
|
|
@@ -3114,34 +3631,41 @@ class AsyncElasticsearch(BaseClient):
|
|
|
3114
3631
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
3115
3632
|
) -> ObjectApiResponse[t.Any]:
|
|
3116
3633
|
"""
|
|
3117
|
-
|
|
3118
|
-
|
|
3119
|
-
|
|
3120
|
-
|
|
3121
|
-
|
|
3634
|
+
.. raw:: html
|
|
3635
|
+
|
|
3636
|
+
<p>Get multiple term vectors.</p>
|
|
3637
|
+
<p>Get multiple term vectors with a single request.
|
|
3638
|
+
You can specify existing documents by index and ID or provide artificial documents in the body of the request.
|
|
3639
|
+
You can specify the index in the request body or request URI.
|
|
3640
|
+
The response contains a <code>docs</code> array with all the fetched termvectors.
|
|
3641
|
+
Each element has the structure provided by the termvectors API.</p>
|
|
3642
|
+
<p><strong>Artificial documents</strong></p>
|
|
3643
|
+
<p>You can also use <code>mtermvectors</code> to generate term vectors for artificial documents provided in the body of the request.
|
|
3644
|
+
The mapping used is determined by the specified <code>_index</code>.</p>
|
|
3645
|
+
|
|
3122
3646
|
|
|
3123
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
3647
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-multi-termvectors.html>`_
|
|
3124
3648
|
|
|
3125
|
-
:param index:
|
|
3126
|
-
:param docs:
|
|
3649
|
+
:param index: The name of the index that contains the documents.
|
|
3650
|
+
:param docs: An array of existing or artificial documents.
|
|
3127
3651
|
:param field_statistics: If `true`, the response includes the document count,
|
|
3128
3652
|
sum of document frequencies, and sum of total term frequencies.
|
|
3129
|
-
:param fields:
|
|
3130
|
-
in the statistics.
|
|
3131
|
-
is provided in the `completion_fields` or `fielddata_fields` parameters.
|
|
3132
|
-
:param ids:
|
|
3653
|
+
:param fields: A comma-separated list or wildcard expressions of fields to include
|
|
3654
|
+
in the statistics. It is used as the default list unless a specific field
|
|
3655
|
+
list is provided in the `completion_fields` or `fielddata_fields` parameters.
|
|
3656
|
+
:param ids: A simplified syntax to specify documents by their ID if they're in
|
|
3133
3657
|
the same index.
|
|
3134
3658
|
:param offsets: If `true`, the response includes term offsets.
|
|
3135
3659
|
:param payloads: If `true`, the response includes term payloads.
|
|
3136
3660
|
:param positions: If `true`, the response includes term positions.
|
|
3137
|
-
:param preference:
|
|
3138
|
-
|
|
3661
|
+
:param preference: The node or shard the operation should be performed on. It
|
|
3662
|
+
is random by default.
|
|
3139
3663
|
:param realtime: If true, the request is real-time as opposed to near-real-time.
|
|
3140
|
-
:param routing:
|
|
3664
|
+
:param routing: A custom value used to route operations to a specific shard.
|
|
3141
3665
|
:param term_statistics: If true, the response includes term frequency and document
|
|
3142
3666
|
frequency.
|
|
3143
3667
|
:param version: If `true`, returns the document version as part of a hit.
|
|
3144
|
-
:param version_type:
|
|
3668
|
+
:param version_type: The version type.
|
|
3145
3669
|
"""
|
|
3146
3670
|
__path_parts: t.Dict[str, str]
|
|
3147
3671
|
if index not in SKIP_IN_PATH:
|
|
@@ -3224,42 +3748,68 @@ class AsyncElasticsearch(BaseClient):
|
|
|
3224
3748
|
human: t.Optional[bool] = None,
|
|
3225
3749
|
ignore_unavailable: t.Optional[bool] = None,
|
|
3226
3750
|
index_filter: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
3751
|
+
max_concurrent_shard_requests: t.Optional[int] = None,
|
|
3227
3752
|
preference: t.Optional[str] = None,
|
|
3228
3753
|
pretty: t.Optional[bool] = None,
|
|
3229
3754
|
routing: t.Optional[str] = None,
|
|
3230
3755
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
3231
3756
|
) -> ObjectApiResponse[t.Any]:
|
|
3232
3757
|
"""
|
|
3233
|
-
|
|
3234
|
-
|
|
3235
|
-
|
|
3236
|
-
|
|
3237
|
-
|
|
3238
|
-
|
|
3239
|
-
|
|
3240
|
-
|
|
3241
|
-
|
|
3242
|
-
|
|
3243
|
-
|
|
3758
|
+
.. raw:: html
|
|
3759
|
+
|
|
3760
|
+
<p>Open a point in time.</p>
|
|
3761
|
+
<p>A search request by default runs against the most recent visible data of the target indices,
|
|
3762
|
+
which is called point in time. Elasticsearch pit (point in time) is a lightweight view into the
|
|
3763
|
+
state of the data as it existed when initiated. In some cases, it’s preferred to perform multiple
|
|
3764
|
+
search requests using the same point in time. For example, if refreshes happen between
|
|
3765
|
+
<code>search_after</code> requests, then the results of those requests might not be consistent as changes happening
|
|
3766
|
+
between searches are only visible to the more recent point in time.</p>
|
|
3767
|
+
<p>A point in time must be opened explicitly before being used in search requests.</p>
|
|
3768
|
+
<p>A subsequent search request with the <code>pit</code> parameter must not specify <code>index</code>, <code>routing</code>, or <code>preference</code> values as these parameters are copied from the point in time.</p>
|
|
3769
|
+
<p>Just like regular searches, you can use <code>from</code> and <code>size</code> to page through point in time search results, up to the first 10,000 hits.
|
|
3770
|
+
If you want to retrieve more hits, use PIT with <code>search_after</code>.</p>
|
|
3771
|
+
<p>IMPORTANT: The open point in time request and each subsequent search request can return different identifiers; always use the most recently received ID for the next search request.</p>
|
|
3772
|
+
<p>When a PIT that contains shard failures is used in a search request, the missing are always reported in the search response as a <code>NoShardAvailableActionException</code> exception.
|
|
3773
|
+
To get rid of these exceptions, a new PIT needs to be created so that shards missing from the previous PIT can be handled, assuming they become available in the meantime.</p>
|
|
3774
|
+
<p><strong>Keeping point in time alive</strong></p>
|
|
3775
|
+
<p>The <code>keep_alive</code> parameter, which is passed to a open point in time request and search request, extends the time to live of the corresponding point in time.
|
|
3776
|
+
The value does not need to be long enough to process all data — it just needs to be long enough for the next request.</p>
|
|
3777
|
+
<p>Normally, the background merge process optimizes the index by merging together smaller segments to create new, bigger segments.
|
|
3778
|
+
Once the smaller segments are no longer needed they are deleted.
|
|
3779
|
+
However, open point-in-times prevent the old segments from being deleted since they are still in use.</p>
|
|
3780
|
+
<p>TIP: Keeping older segments alive means that more disk space and file handles are needed.
|
|
3781
|
+
Ensure that you have configured your nodes to have ample free file handles.</p>
|
|
3782
|
+
<p>Additionally, if a segment contains deleted or updated documents then the point in time must keep track of whether each document in the segment was live at the time of the initial search request.
|
|
3783
|
+
Ensure that your nodes have sufficient heap space if you have many open point-in-times on an index that is subject to ongoing deletes or updates.
|
|
3784
|
+
Note that a point-in-time doesn't prevent its associated indices from being deleted.
|
|
3785
|
+
You can check how many point-in-times (that is, search contexts) are open with the nodes stats API.</p>
|
|
3786
|
+
|
|
3787
|
+
|
|
3788
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/point-in-time-api.html>`_
|
|
3244
3789
|
|
|
3245
3790
|
:param index: A comma-separated list of index names to open point in time; use
|
|
3246
3791
|
`_all` or empty string to perform the operation on all indices
|
|
3247
|
-
:param keep_alive:
|
|
3248
|
-
:param allow_partial_search_results:
|
|
3249
|
-
|
|
3250
|
-
|
|
3251
|
-
|
|
3252
|
-
|
|
3253
|
-
|
|
3254
|
-
|
|
3255
|
-
|
|
3792
|
+
:param keep_alive: Extend the length of time that the point in time persists.
|
|
3793
|
+
:param allow_partial_search_results: Indicates whether the point in time tolerates
|
|
3794
|
+
unavailable shards or shard failures when initially creating the PIT. If
|
|
3795
|
+
`false`, creating a point in time request when a shard is missing or unavailable
|
|
3796
|
+
will throw an exception. If `true`, the point in time will contain all the
|
|
3797
|
+
shards that are available at the time of the request.
|
|
3798
|
+
:param expand_wildcards: The type of index that wildcard patterns can match.
|
|
3799
|
+
If the request can target data streams, this argument determines whether
|
|
3800
|
+
wildcard expressions match hidden data streams. It supports comma-separated
|
|
3801
|
+
values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`,
|
|
3802
|
+
`hidden`, `none`.
|
|
3256
3803
|
:param ignore_unavailable: If `false`, the request returns an error if it targets
|
|
3257
3804
|
a missing or closed index.
|
|
3258
|
-
:param index_filter:
|
|
3259
|
-
|
|
3260
|
-
:param
|
|
3261
|
-
|
|
3262
|
-
:param
|
|
3805
|
+
:param index_filter: Filter indices if the provided query rewrites to `match_none`
|
|
3806
|
+
on every shard.
|
|
3807
|
+
:param max_concurrent_shard_requests: Maximum number of concurrent shard requests
|
|
3808
|
+
that each sub-search request executes per node.
|
|
3809
|
+
:param preference: The node or shard the operation should be performed on. By
|
|
3810
|
+
default, it is random.
|
|
3811
|
+
:param routing: A custom value that is used to route operations to a specific
|
|
3812
|
+
shard.
|
|
3263
3813
|
"""
|
|
3264
3814
|
if index in SKIP_IN_PATH:
|
|
3265
3815
|
raise ValueError("Empty value passed for parameter 'index'")
|
|
@@ -3283,6 +3833,8 @@ class AsyncElasticsearch(BaseClient):
|
|
|
3283
3833
|
__query["human"] = human
|
|
3284
3834
|
if ignore_unavailable is not None:
|
|
3285
3835
|
__query["ignore_unavailable"] = ignore_unavailable
|
|
3836
|
+
if max_concurrent_shard_requests is not None:
|
|
3837
|
+
__query["max_concurrent_shard_requests"] = max_concurrent_shard_requests
|
|
3286
3838
|
if preference is not None:
|
|
3287
3839
|
__query["preference"] = preference
|
|
3288
3840
|
if pretty is not None:
|
|
@@ -3325,23 +3877,27 @@ class AsyncElasticsearch(BaseClient):
|
|
|
3325
3877
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
3326
3878
|
) -> ObjectApiResponse[t.Any]:
|
|
3327
3879
|
"""
|
|
3328
|
-
|
|
3329
|
-
|
|
3330
|
-
|
|
3331
|
-
|
|
3332
|
-
|
|
3333
|
-
|
|
3334
|
-
|
|
3335
|
-
|
|
3336
|
-
|
|
3337
|
-
|
|
3338
|
-
|
|
3339
|
-
|
|
3340
|
-
|
|
3341
|
-
|
|
3342
|
-
|
|
3343
|
-
|
|
3344
|
-
|
|
3880
|
+
.. raw:: html
|
|
3881
|
+
|
|
3882
|
+
<p>Create or update a script or search template.
|
|
3883
|
+
Creates or updates a stored script or search template.</p>
|
|
3884
|
+
|
|
3885
|
+
|
|
3886
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/create-stored-script-api.html>`_
|
|
3887
|
+
|
|
3888
|
+
:param id: The identifier for the stored script or search template. It must be
|
|
3889
|
+
unique within the cluster.
|
|
3890
|
+
:param script: The script or search template, its parameters, and its language.
|
|
3891
|
+
:param context: The context in which the script or search template should run.
|
|
3892
|
+
To prevent errors, the API immediately compiles the script or template in
|
|
3893
|
+
this context.
|
|
3894
|
+
:param master_timeout: The period to wait for a connection to the master node.
|
|
3895
|
+
If no response is received before the timeout expires, the request fails
|
|
3896
|
+
and returns an error. It can also be set to `-1` to indicate that the request
|
|
3897
|
+
should never timeout.
|
|
3898
|
+
:param timeout: The period to wait for a response. If no response is received
|
|
3899
|
+
before the timeout expires, the request fails and returns an error. It can
|
|
3900
|
+
also be set to `-1` to indicate that the request should never timeout.
|
|
3345
3901
|
"""
|
|
3346
3902
|
if id in SKIP_IN_PATH:
|
|
3347
3903
|
raise ValueError("Empty value passed for parameter 'id'")
|
|
@@ -3411,14 +3967,17 @@ class AsyncElasticsearch(BaseClient):
|
|
|
3411
3967
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
3412
3968
|
) -> ObjectApiResponse[t.Any]:
|
|
3413
3969
|
"""
|
|
3414
|
-
|
|
3415
|
-
|
|
3970
|
+
.. raw:: html
|
|
3971
|
+
|
|
3972
|
+
<p>Evaluate ranked search results.</p>
|
|
3973
|
+
<p>Evaluate the quality of ranked search results over a set of typical search queries.</p>
|
|
3416
3974
|
|
|
3417
|
-
|
|
3975
|
+
|
|
3976
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/search-rank-eval.html>`_
|
|
3418
3977
|
|
|
3419
3978
|
:param requests: A set of typical search requests, together with their provided
|
|
3420
3979
|
ratings.
|
|
3421
|
-
:param index:
|
|
3980
|
+
:param index: A comma-separated list of data streams, indices, and index aliases
|
|
3422
3981
|
used to limit the request. Wildcard (`*`) expressions are supported. To target
|
|
3423
3982
|
all data streams and indices in a cluster, omit this parameter or use `_all`
|
|
3424
3983
|
or `*`.
|
|
@@ -3506,33 +4065,187 @@ class AsyncElasticsearch(BaseClient):
|
|
|
3506
4065
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
3507
4066
|
) -> ObjectApiResponse[t.Any]:
|
|
3508
4067
|
"""
|
|
3509
|
-
|
|
3510
|
-
|
|
3511
|
-
|
|
3512
|
-
|
|
3513
|
-
|
|
4068
|
+
.. raw:: html
|
|
4069
|
+
|
|
4070
|
+
<p>Reindex documents.</p>
|
|
4071
|
+
<p>Copy documents from a source to a destination.
|
|
4072
|
+
You can copy all documents to the destination index or reindex a subset of the documents.
|
|
4073
|
+
The source can be any existing index, alias, or data stream.
|
|
4074
|
+
The destination must differ from the source.
|
|
4075
|
+
For example, you cannot reindex a data stream into itself.</p>
|
|
4076
|
+
<p>IMPORTANT: Reindex requires <code>_source</code> to be enabled for all documents in the source.
|
|
4077
|
+
The destination should be configured as wanted before calling the reindex API.
|
|
4078
|
+
Reindex does not copy the settings from the source or its associated template.
|
|
4079
|
+
Mappings, shard counts, and replicas, for example, must be configured ahead of time.</p>
|
|
4080
|
+
<p>If the Elasticsearch security features are enabled, you must have the following security privileges:</p>
|
|
4081
|
+
<ul>
|
|
4082
|
+
<li>The <code>read</code> index privilege for the source data stream, index, or alias.</li>
|
|
4083
|
+
<li>The <code>write</code> index privilege for the destination data stream, index, or index alias.</li>
|
|
4084
|
+
<li>To automatically create a data stream or index with a reindex API request, you must have the <code>auto_configure</code>, <code>create_index</code>, or <code>manage</code> index privilege for the destination data stream, index, or alias.</li>
|
|
4085
|
+
<li>If reindexing from a remote cluster, the <code>source.remote.user</code> must have the <code>monitor</code> cluster privilege and the <code>read</code> index privilege for the source data stream, index, or alias.</li>
|
|
4086
|
+
</ul>
|
|
4087
|
+
<p>If reindexing from a remote cluster, you must explicitly allow the remote host in the <code>reindex.remote.whitelist</code> setting.
|
|
4088
|
+
Automatic data stream creation requires a matching index template with data stream enabled.</p>
|
|
4089
|
+
<p>The <code>dest</code> element can be configured like the index API to control optimistic concurrency control.
|
|
4090
|
+
Omitting <code>version_type</code> or setting it to <code>internal</code> causes Elasticsearch to blindly dump documents into the destination, overwriting any that happen to have the same ID.</p>
|
|
4091
|
+
<p>Setting <code>version_type</code> to <code>external</code> causes Elasticsearch to preserve the <code>version</code> from the source, create any documents that are missing, and update any documents that have an older version in the destination than they do in the source.</p>
|
|
4092
|
+
<p>Setting <code>op_type</code> to <code>create</code> causes the reindex API to create only missing documents in the destination.
|
|
4093
|
+
All existing documents will cause a version conflict.</p>
|
|
4094
|
+
<p>IMPORTANT: Because data streams are append-only, any reindex request to a destination data stream must have an <code>op_type</code> of <code>create</code>.
|
|
4095
|
+
A reindex can only add new documents to a destination data stream.
|
|
4096
|
+
It cannot update existing documents in a destination data stream.</p>
|
|
4097
|
+
<p>By default, version conflicts abort the reindex process.
|
|
4098
|
+
To continue reindexing if there are conflicts, set the <code>conflicts</code> request body property to <code>proceed</code>.
|
|
4099
|
+
In this case, the response includes a count of the version conflicts that were encountered.
|
|
4100
|
+
Note that the handling of other error types is unaffected by the <code>conflicts</code> property.
|
|
4101
|
+
Additionally, if you opt to count version conflicts, the operation could attempt to reindex more documents from the source than <code>max_docs</code> until it has successfully indexed <code>max_docs</code> documents into the target or it has gone through every document in the source query.</p>
|
|
4102
|
+
<p>NOTE: The reindex API makes no effort to handle ID collisions.
|
|
4103
|
+
The last document written will "win" but the order isn't usually predictable so it is not a good idea to rely on this behavior.
|
|
4104
|
+
Instead, make sure that IDs are unique by using a script.</p>
|
|
4105
|
+
<p><strong>Running reindex asynchronously</strong></p>
|
|
4106
|
+
<p>If the request contains <code>wait_for_completion=false</code>, Elasticsearch performs some preflight checks, launches the request, and returns a task you can use to cancel or get the status of the task.
|
|
4107
|
+
Elasticsearch creates a record of this task as a document at <code>_tasks/<task_id></code>.</p>
|
|
4108
|
+
<p><strong>Reindex from multiple sources</strong></p>
|
|
4109
|
+
<p>If you have many sources to reindex it is generally better to reindex them one at a time rather than using a glob pattern to pick up multiple sources.
|
|
4110
|
+
That way you can resume the process if there are any errors by removing the partially completed source and starting over.
|
|
4111
|
+
It also makes parallelizing the process fairly simple: split the list of sources to reindex and run each list in parallel.</p>
|
|
4112
|
+
<p>For example, you can use a bash script like this:</p>
|
|
4113
|
+
<pre><code>for index in i1 i2 i3 i4 i5; do
|
|
4114
|
+
curl -HContent-Type:application/json -XPOST localhost:9200/_reindex?pretty -d'{
|
|
4115
|
+
"source": {
|
|
4116
|
+
"index": "'$index'"
|
|
4117
|
+
},
|
|
4118
|
+
"dest": {
|
|
4119
|
+
"index": "'$index'-reindexed"
|
|
4120
|
+
}
|
|
4121
|
+
}'
|
|
4122
|
+
done
|
|
4123
|
+
</code></pre>
|
|
4124
|
+
<p><strong>Throttling</strong></p>
|
|
4125
|
+
<p>Set <code>requests_per_second</code> to any positive decimal number (<code>1.4</code>, <code>6</code>, <code>1000</code>, for example) to throttle the rate at which reindex issues batches of index operations.
|
|
4126
|
+
Requests are throttled by padding each batch with a wait time.
|
|
4127
|
+
To turn off throttling, set <code>requests_per_second</code> to <code>-1</code>.</p>
|
|
4128
|
+
<p>The throttling is done by waiting between batches so that the scroll that reindex uses internally can be given a timeout that takes into account the padding.
|
|
4129
|
+
The padding time is the difference between the batch size divided by the <code>requests_per_second</code> and the time spent writing.
|
|
4130
|
+
By default the batch size is <code>1000</code>, so if <code>requests_per_second</code> is set to <code>500</code>:</p>
|
|
4131
|
+
<pre><code>target_time = 1000 / 500 per second = 2 seconds
|
|
4132
|
+
wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds
|
|
4133
|
+
</code></pre>
|
|
4134
|
+
<p>Since the batch is issued as a single bulk request, large batch sizes cause Elasticsearch to create many requests and then wait for a while before starting the next set.
|
|
4135
|
+
This is "bursty" instead of "smooth".</p>
|
|
4136
|
+
<p><strong>Slicing</strong></p>
|
|
4137
|
+
<p>Reindex supports sliced scroll to parallelize the reindexing process.
|
|
4138
|
+
This parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts.</p>
|
|
4139
|
+
<p>NOTE: Reindexing from remote clusters does not support manual or automatic slicing.</p>
|
|
4140
|
+
<p>You can slice a reindex request manually by providing a slice ID and total number of slices to each request.
|
|
4141
|
+
You can also let reindex automatically parallelize by using sliced scroll to slice on <code>_id</code>.
|
|
4142
|
+
The <code>slices</code> parameter specifies the number of slices to use.</p>
|
|
4143
|
+
<p>Adding <code>slices</code> to the reindex request just automates the manual process, creating sub-requests which means it has some quirks:</p>
|
|
4144
|
+
<ul>
|
|
4145
|
+
<li>You can see these requests in the tasks API. These sub-requests are "child" tasks of the task for the request with slices.</li>
|
|
4146
|
+
<li>Fetching the status of the task for the request with <code>slices</code> only contains the status of completed slices.</li>
|
|
4147
|
+
<li>These sub-requests are individually addressable for things like cancellation and rethrottling.</li>
|
|
4148
|
+
<li>Rethrottling the request with <code>slices</code> will rethrottle the unfinished sub-request proportionally.</li>
|
|
4149
|
+
<li>Canceling the request with <code>slices</code> will cancel each sub-request.</li>
|
|
4150
|
+
<li>Due to the nature of <code>slices</code>, each sub-request won't get a perfectly even portion of the documents. All documents will be addressed, but some slices may be larger than others. Expect larger slices to have a more even distribution.</li>
|
|
4151
|
+
<li>Parameters like <code>requests_per_second</code> and <code>max_docs</code> on a request with <code>slices</code> are distributed proportionally to each sub-request. Combine that with the previous point about distribution being uneven and you should conclude that using <code>max_docs</code> with <code>slices</code> might not result in exactly <code>max_docs</code> documents being reindexed.</li>
|
|
4152
|
+
<li>Each sub-request gets a slightly different snapshot of the source, though these are all taken at approximately the same time.</li>
|
|
4153
|
+
</ul>
|
|
4154
|
+
<p>If slicing automatically, setting <code>slices</code> to <code>auto</code> will choose a reasonable number for most indices.
|
|
4155
|
+
If slicing manually or otherwise tuning automatic slicing, use the following guidelines.</p>
|
|
4156
|
+
<p>Query performance is most efficient when the number of slices is equal to the number of shards in the index.
|
|
4157
|
+
If that number is large (for example, <code>500</code>), choose a lower number as too many slices will hurt performance.
|
|
4158
|
+
Setting slices higher than the number of shards generally does not improve efficiency and adds overhead.</p>
|
|
4159
|
+
<p>Indexing performance scales linearly across available resources with the number of slices.</p>
|
|
4160
|
+
<p>Whether query or indexing performance dominates the runtime depends on the documents being reindexed and cluster resources.</p>
|
|
4161
|
+
<p><strong>Modify documents during reindexing</strong></p>
|
|
4162
|
+
<p>Like <code>_update_by_query</code>, reindex operations support a script that modifies the document.
|
|
4163
|
+
Unlike <code>_update_by_query</code>, the script is allowed to modify the document's metadata.</p>
|
|
4164
|
+
<p>Just as in <code>_update_by_query</code>, you can set <code>ctx.op</code> to change the operation that is run on the destination.
|
|
4165
|
+
For example, set <code>ctx.op</code> to <code>noop</code> if your script decides that the document doesn’t have to be indexed in the destination. This "no operation" will be reported in the <code>noop</code> counter in the response body.
|
|
4166
|
+
Set <code>ctx.op</code> to <code>delete</code> if your script decides that the document must be deleted from the destination.
|
|
4167
|
+
The deletion will be reported in the <code>deleted</code> counter in the response body.
|
|
4168
|
+
Setting <code>ctx.op</code> to anything else will return an error, as will setting any other field in <code>ctx</code>.</p>
|
|
4169
|
+
<p>Think of the possibilities! Just be careful; you are able to change:</p>
|
|
4170
|
+
<ul>
|
|
4171
|
+
<li><code>_id</code></li>
|
|
4172
|
+
<li><code>_index</code></li>
|
|
4173
|
+
<li><code>_version</code></li>
|
|
4174
|
+
<li><code>_routing</code></li>
|
|
4175
|
+
</ul>
|
|
4176
|
+
<p>Setting <code>_version</code> to <code>null</code> or clearing it from the <code>ctx</code> map is just like not sending the version in an indexing request.
|
|
4177
|
+
It will cause the document to be overwritten in the destination regardless of the version on the target or the version type you use in the reindex API.</p>
|
|
4178
|
+
<p><strong>Reindex from remote</strong></p>
|
|
4179
|
+
<p>Reindex supports reindexing from a remote Elasticsearch cluster.
|
|
4180
|
+
The <code>host</code> parameter must contain a scheme, host, port, and optional path.
|
|
4181
|
+
The <code>username</code> and <code>password</code> parameters are optional and when they are present the reindex operation will connect to the remote Elasticsearch node using basic authentication.
|
|
4182
|
+
Be sure to use HTTPS when using basic authentication or the password will be sent in plain text.
|
|
4183
|
+
There are a range of settings available to configure the behavior of the HTTPS connection.</p>
|
|
4184
|
+
<p>When using Elastic Cloud, it is also possible to authenticate against the remote cluster through the use of a valid API key.
|
|
4185
|
+
Remote hosts must be explicitly allowed with the <code>reindex.remote.whitelist</code> setting.
|
|
4186
|
+
It can be set to a comma delimited list of allowed remote host and port combinations.
|
|
4187
|
+
Scheme is ignored; only the host and port are used.
|
|
4188
|
+
For example:</p>
|
|
4189
|
+
<pre><code>reindex.remote.whitelist: [otherhost:9200, another:9200, 127.0.10.*:9200, localhost:*"]
|
|
4190
|
+
</code></pre>
|
|
4191
|
+
<p>The list of allowed hosts must be configured on any nodes that will coordinate the reindex.
|
|
4192
|
+
This feature should work with remote clusters of any version of Elasticsearch.
|
|
4193
|
+
This should enable you to upgrade from any version of Elasticsearch to the current version by reindexing from a cluster of the old version.</p>
|
|
4194
|
+
<p>WARNING: Elasticsearch does not support forward compatibility across major versions.
|
|
4195
|
+
For example, you cannot reindex from a 7.x cluster into a 6.x cluster.</p>
|
|
4196
|
+
<p>To enable queries sent to older versions of Elasticsearch, the <code>query</code> parameter is sent directly to the remote host without validation or modification.</p>
|
|
4197
|
+
<p>NOTE: Reindexing from remote clusters does not support manual or automatic slicing.</p>
|
|
4198
|
+
<p>Reindexing from a remote server uses an on-heap buffer that defaults to a maximum size of 100mb.
|
|
4199
|
+
If the remote index includes very large documents you'll need to use a smaller batch size.
|
|
4200
|
+
It is also possible to set the socket read timeout on the remote connection with the <code>socket_timeout</code> field and the connection timeout with the <code>connect_timeout</code> field.
|
|
4201
|
+
Both default to 30 seconds.</p>
|
|
4202
|
+
<p><strong>Configuring SSL parameters</strong></p>
|
|
4203
|
+
<p>Reindex from remote supports configurable SSL settings.
|
|
4204
|
+
These must be specified in the <code>elasticsearch.yml</code> file, with the exception of the secure settings, which you add in the Elasticsearch keystore.
|
|
4205
|
+
It is not possible to configure SSL in the body of the reindex request.</p>
|
|
4206
|
+
|
|
4207
|
+
|
|
4208
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-reindex.html>`_
|
|
3514
4209
|
|
|
3515
4210
|
:param dest: The destination you are copying to.
|
|
3516
4211
|
:param source: The source you are copying from.
|
|
3517
|
-
:param conflicts:
|
|
3518
|
-
|
|
4212
|
+
:param conflicts: Indicates whether to continue reindexing even when there are
|
|
4213
|
+
conflicts.
|
|
4214
|
+
:param max_docs: The maximum number of documents to reindex. By default, all
|
|
4215
|
+
documents are reindexed. If it is a value less then or equal to `scroll_size`,
|
|
4216
|
+
a scroll will not be used to retrieve the results for the operation. If `conflicts`
|
|
4217
|
+
is set to `proceed`, the reindex operation could attempt to reindex more
|
|
4218
|
+
documents from the source than `max_docs` until it has successfully indexed
|
|
4219
|
+
`max_docs` documents into the target or it has gone through every document
|
|
4220
|
+
in the source query.
|
|
3519
4221
|
:param refresh: If `true`, the request refreshes affected shards to make this
|
|
3520
4222
|
operation visible to search.
|
|
3521
4223
|
:param requests_per_second: The throttle for this request in sub-requests per
|
|
3522
|
-
second.
|
|
4224
|
+
second. By default, there is no throttle.
|
|
3523
4225
|
:param require_alias: If `true`, the destination must be an index alias.
|
|
3524
4226
|
:param script: The script to run to update the document source or metadata when
|
|
3525
4227
|
reindexing.
|
|
3526
|
-
:param scroll:
|
|
3527
|
-
for scrolled search.
|
|
4228
|
+
:param scroll: The period of time that a consistent view of the index should
|
|
4229
|
+
be maintained for scrolled search.
|
|
3528
4230
|
:param size:
|
|
3529
|
-
:param slices: The number of slices this task should be divided into.
|
|
3530
|
-
to
|
|
3531
|
-
|
|
3532
|
-
|
|
4231
|
+
:param slices: The number of slices this task should be divided into. It defaults
|
|
4232
|
+
to one slice, which means the task isn't sliced into subtasks. Reindex supports
|
|
4233
|
+
sliced scroll to parallelize the reindexing process. This parallelization
|
|
4234
|
+
can improve efficiency and provide a convenient way to break the request
|
|
4235
|
+
down into smaller parts. NOTE: Reindexing from remote clusters does not support
|
|
4236
|
+
manual or automatic slicing. If set to `auto`, Elasticsearch chooses the
|
|
4237
|
+
number of slices to use. This setting will use one slice per shard, up to
|
|
4238
|
+
a certain limit. If there are multiple sources, it will choose the number
|
|
4239
|
+
of slices based on the index or backing index with the smallest number of
|
|
4240
|
+
shards.
|
|
4241
|
+
:param timeout: The period each indexing waits for automatic index creation,
|
|
4242
|
+
dynamic mapping updates, and waiting for active shards. By default, Elasticsearch
|
|
4243
|
+
waits for at least one minute before failing. The actual wait time could
|
|
4244
|
+
be longer, particularly when multiple waits occur.
|
|
3533
4245
|
:param wait_for_active_shards: The number of shard copies that must be active
|
|
3534
|
-
before proceeding with the operation. Set to `all` or any positive integer
|
|
3535
|
-
up to the total number of shards in the index (`number_of_replicas+1`).
|
|
4246
|
+
before proceeding with the operation. Set it to `all` or any positive integer
|
|
4247
|
+
up to the total number of shards in the index (`number_of_replicas+1`). The
|
|
4248
|
+
default value is one, which means it waits for each primary shard to be active.
|
|
3536
4249
|
:param wait_for_completion: If `true`, the request blocks until the operation
|
|
3537
4250
|
is complete.
|
|
3538
4251
|
"""
|
|
@@ -3604,14 +4317,24 @@ class AsyncElasticsearch(BaseClient):
|
|
|
3604
4317
|
requests_per_second: t.Optional[float] = None,
|
|
3605
4318
|
) -> ObjectApiResponse[t.Any]:
|
|
3606
4319
|
"""
|
|
3607
|
-
|
|
3608
|
-
particular reindex operation.
|
|
4320
|
+
.. raw:: html
|
|
3609
4321
|
|
|
3610
|
-
|
|
4322
|
+
<p>Throttle a reindex operation.</p>
|
|
4323
|
+
<p>Change the number of requests per second for a particular reindex operation.
|
|
4324
|
+
For example:</p>
|
|
4325
|
+
<pre><code>POST _reindex/r1A2WoRbTwKZ516z6NEs5A:36619/_rethrottle?requests_per_second=-1
|
|
4326
|
+
</code></pre>
|
|
4327
|
+
<p>Rethrottling that speeds up the query takes effect immediately.
|
|
4328
|
+
Rethrottling that slows down the query will take effect after completing the current batch.
|
|
4329
|
+
This behavior prevents scroll timeouts.</p>
|
|
3611
4330
|
|
|
3612
|
-
|
|
4331
|
+
|
|
4332
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-reindex.html>`_
|
|
4333
|
+
|
|
4334
|
+
:param task_id: The task identifier, which can be found by using the tasks API.
|
|
3613
4335
|
:param requests_per_second: The throttle for this request in sub-requests per
|
|
3614
|
-
second.
|
|
4336
|
+
second. It can be either `-1` to turn off throttling or any decimal number
|
|
4337
|
+
like `1.7` or `12` to throttle to that level.
|
|
3615
4338
|
"""
|
|
3616
4339
|
if task_id in SKIP_IN_PATH:
|
|
3617
4340
|
raise ValueError("Empty value passed for parameter 'task_id'")
|
|
@@ -3656,17 +4379,21 @@ class AsyncElasticsearch(BaseClient):
|
|
|
3656
4379
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
3657
4380
|
) -> ObjectApiResponse[t.Any]:
|
|
3658
4381
|
"""
|
|
3659
|
-
|
|
4382
|
+
.. raw:: html
|
|
3660
4383
|
|
|
3661
|
-
|
|
4384
|
+
<p>Render a search template.</p>
|
|
4385
|
+
<p>Render a search template as a search request body.</p>
|
|
3662
4386
|
|
|
3663
|
-
|
|
4387
|
+
|
|
4388
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/render-search-template-api.html>`_
|
|
4389
|
+
|
|
4390
|
+
:param id: The ID of the search template to render. If no `source` is specified,
|
|
3664
4391
|
this or the `id` request body parameter is required.
|
|
3665
4392
|
:param file:
|
|
3666
4393
|
:param params: Key-value pairs used to replace Mustache variables in the template.
|
|
3667
4394
|
The key is the variable name. The value is the variable value.
|
|
3668
|
-
:param source: An inline search template.
|
|
3669
|
-
search API's request body. These parameters also support Mustache variables.
|
|
4395
|
+
:param source: An inline search template. It supports the same parameters as
|
|
4396
|
+
the search API's request body. These parameters also support Mustache variables.
|
|
3670
4397
|
If no `id` or `<templated-id>` is specified, this parameter is required.
|
|
3671
4398
|
"""
|
|
3672
4399
|
__path_parts: t.Dict[str, str]
|
|
@@ -3715,7 +4442,24 @@ class AsyncElasticsearch(BaseClient):
|
|
|
3715
4442
|
async def scripts_painless_execute(
|
|
3716
4443
|
self,
|
|
3717
4444
|
*,
|
|
3718
|
-
context: t.Optional[
|
|
4445
|
+
context: t.Optional[
|
|
4446
|
+
t.Union[
|
|
4447
|
+
str,
|
|
4448
|
+
t.Literal[
|
|
4449
|
+
"boolean_field",
|
|
4450
|
+
"composite_field",
|
|
4451
|
+
"date_field",
|
|
4452
|
+
"double_field",
|
|
4453
|
+
"filter",
|
|
4454
|
+
"geo_point_field",
|
|
4455
|
+
"ip_field",
|
|
4456
|
+
"keyword_field",
|
|
4457
|
+
"long_field",
|
|
4458
|
+
"painless_test",
|
|
4459
|
+
"score",
|
|
4460
|
+
],
|
|
4461
|
+
]
|
|
4462
|
+
] = None,
|
|
3719
4463
|
context_setup: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
3720
4464
|
error_trace: t.Optional[bool] = None,
|
|
3721
4465
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
@@ -3725,13 +4469,24 @@ class AsyncElasticsearch(BaseClient):
|
|
|
3725
4469
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
3726
4470
|
) -> ObjectApiResponse[t.Any]:
|
|
3727
4471
|
"""
|
|
3728
|
-
|
|
4472
|
+
.. raw:: html
|
|
4473
|
+
|
|
4474
|
+
<p>Run a script.</p>
|
|
4475
|
+
<p>Runs a script and returns a result.
|
|
4476
|
+
Use this API to build and test scripts, such as when defining a script for a runtime field.
|
|
4477
|
+
This API requires very few dependencies and is especially useful if you don't have permissions to write documents on a cluster.</p>
|
|
4478
|
+
<p>The API uses several <em>contexts</em>, which control how scripts are run, what variables are available at runtime, and what the return type is.</p>
|
|
4479
|
+
<p>Each context requires a script, but additional parameters depend on the context you're using for that script.</p>
|
|
4480
|
+
|
|
3729
4481
|
|
|
3730
|
-
`<https://www.elastic.co/guide/en/elasticsearch/painless/8.
|
|
4482
|
+
`<https://www.elastic.co/guide/en/elasticsearch/painless/8.18/painless-execute-api.html>`_
|
|
3731
4483
|
|
|
3732
|
-
:param context: The context that the script should run in.
|
|
3733
|
-
|
|
3734
|
-
:param
|
|
4484
|
+
:param context: The context that the script should run in. NOTE: Result ordering
|
|
4485
|
+
in the field contexts is not guaranteed.
|
|
4486
|
+
:param context_setup: Additional parameters for the `context`. NOTE: This parameter
|
|
4487
|
+
is required for all contexts except `painless_test`, which is the default
|
|
4488
|
+
if no value is provided for `context`.
|
|
4489
|
+
:param script: The Painless script to run.
|
|
3735
4490
|
"""
|
|
3736
4491
|
__path_parts: t.Dict[str, str] = {}
|
|
3737
4492
|
__path = "/_scripts/painless/_execute"
|
|
@@ -3783,30 +4538,27 @@ class AsyncElasticsearch(BaseClient):
|
|
|
3783
4538
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
3784
4539
|
) -> ObjectApiResponse[t.Any]:
|
|
3785
4540
|
"""
|
|
3786
|
-
|
|
3787
|
-
|
|
3788
|
-
|
|
3789
|
-
|
|
3790
|
-
|
|
3791
|
-
|
|
3792
|
-
|
|
3793
|
-
|
|
3794
|
-
|
|
3795
|
-
|
|
3796
|
-
|
|
3797
|
-
|
|
3798
|
-
|
|
3799
|
-
|
|
3800
|
-
|
|
3801
|
-
|
|
3802
|
-
|
|
3803
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.17/search-request-body.html#request-body-search-scroll>`_
|
|
3804
|
-
|
|
3805
|
-
:param scroll_id: Scroll ID of the search.
|
|
4541
|
+
.. raw:: html
|
|
4542
|
+
|
|
4543
|
+
<p>Run a scrolling search.</p>
|
|
4544
|
+
<p>IMPORTANT: The scroll API is no longer recommend for deep pagination. If you need to preserve the index state while paging through more than 10,000 hits, use the <code>search_after</code> parameter with a point in time (PIT).</p>
|
|
4545
|
+
<p>The scroll API gets large sets of results from a single scrolling search request.
|
|
4546
|
+
To get the necessary scroll ID, submit a search API request that includes an argument for the <code>scroll</code> query parameter.
|
|
4547
|
+
The <code>scroll</code> parameter indicates how long Elasticsearch should retain the search context for the request.
|
|
4548
|
+
The search response returns a scroll ID in the <code>_scroll_id</code> response body parameter.
|
|
4549
|
+
You can then use the scroll ID with the scroll API to retrieve the next batch of results for the request.
|
|
4550
|
+
If the Elasticsearch security features are enabled, the access to the results of a specific scroll ID is restricted to the user or API key that submitted the search.</p>
|
|
4551
|
+
<p>You can also use the scroll API to specify a new scroll parameter that extends or shortens the retention period for the search context.</p>
|
|
4552
|
+
<p>IMPORTANT: Results from a scrolling search reflect the state of the index at the time of the initial search request. Subsequent indexing or document changes only affect later search and scroll requests.</p>
|
|
4553
|
+
|
|
4554
|
+
|
|
4555
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/scroll-api.html>`_
|
|
4556
|
+
|
|
4557
|
+
:param scroll_id: The scroll ID of the search.
|
|
3806
4558
|
:param rest_total_hits_as_int: If true, the API response’s hit.total property
|
|
3807
4559
|
is returned as an integer. If false, the API response’s hit.total property
|
|
3808
4560
|
is returned as an object.
|
|
3809
|
-
:param scroll:
|
|
4561
|
+
:param scroll: The period to retain the search context for scrolling.
|
|
3810
4562
|
"""
|
|
3811
4563
|
if scroll_id is None and body is None:
|
|
3812
4564
|
raise ValueError("Empty value passed for parameter 'scroll_id'")
|
|
@@ -3988,15 +4740,29 @@ class AsyncElasticsearch(BaseClient):
|
|
|
3988
4740
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
3989
4741
|
) -> ObjectApiResponse[t.Any]:
|
|
3990
4742
|
"""
|
|
3991
|
-
|
|
3992
|
-
|
|
3993
|
-
|
|
3994
|
-
|
|
3995
|
-
|
|
4743
|
+
.. raw:: html
|
|
4744
|
+
|
|
4745
|
+
<p>Run a search.</p>
|
|
4746
|
+
<p>Get search hits that match the query defined in the request.
|
|
4747
|
+
You can provide search queries using the <code>q</code> query string parameter or the request body.
|
|
4748
|
+
If both are specified, only the query parameter is used.</p>
|
|
4749
|
+
<p>If the Elasticsearch security features are enabled, you must have the read index privilege for the target data stream, index, or alias. For cross-cluster search, refer to the documentation about configuring CCS privileges.
|
|
4750
|
+
To search a point in time (PIT) for an alias, you must have the <code>read</code> index privilege for the alias's data streams or indices.</p>
|
|
4751
|
+
<p><strong>Search slicing</strong></p>
|
|
4752
|
+
<p>When paging through a large number of documents, it can be helpful to split the search into multiple slices to consume them independently with the <code>slice</code> and <code>pit</code> properties.
|
|
4753
|
+
By default the splitting is done first on the shards, then locally on each shard.
|
|
4754
|
+
The local splitting partitions the shard into contiguous ranges based on Lucene document IDs.</p>
|
|
4755
|
+
<p>For instance if the number of shards is equal to 2 and you request 4 slices, the slices 0 and 2 are assigned to the first shard and the slices 1 and 3 are assigned to the second shard.</p>
|
|
4756
|
+
<p>IMPORTANT: The same point-in-time ID should be used for all slices.
|
|
4757
|
+
If different PIT IDs are used, slices can overlap and miss documents.
|
|
4758
|
+
This situation can occur because the splitting criterion is based on Lucene document IDs, which are not stable across changes to the index.</p>
|
|
4759
|
+
|
|
4760
|
+
|
|
4761
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/search-search.html>`_
|
|
3996
4762
|
|
|
3997
|
-
:param index:
|
|
3998
|
-
|
|
3999
|
-
parameter or use `*` or `_all`.
|
|
4763
|
+
:param index: A comma-separated list of data streams, indices, and aliases to
|
|
4764
|
+
search. It supports wildcards (`*`). To search all data streams and indices,
|
|
4765
|
+
omit this parameter or use `*` or `_all`.
|
|
4000
4766
|
:param aggregations: Defines the aggregations that are run as part of the search
|
|
4001
4767
|
request.
|
|
4002
4768
|
:param aggs: Defines the aggregations that are run as part of the search request.
|
|
@@ -4005,45 +4771,46 @@ class AsyncElasticsearch(BaseClient):
|
|
|
4005
4771
|
This behavior applies even if the request targets other open indices. For
|
|
4006
4772
|
example, a request targeting `foo*,bar*` returns an error if an index starts
|
|
4007
4773
|
with `foo` but no index starts with `bar`.
|
|
4008
|
-
:param allow_partial_search_results: If true
|
|
4009
|
-
|
|
4010
|
-
with no partial results.
|
|
4011
|
-
|
|
4012
|
-
|
|
4013
|
-
|
|
4014
|
-
|
|
4774
|
+
:param allow_partial_search_results: If `true` and there are shard request timeouts
|
|
4775
|
+
or shard failures, the request returns partial results. If `false`, it returns
|
|
4776
|
+
an error with no partial results. To override the default behavior, you can
|
|
4777
|
+
set the `search.default_allow_partial_results` cluster setting to `false`.
|
|
4778
|
+
:param analyze_wildcard: If `true`, wildcard and prefix queries are analyzed.
|
|
4779
|
+
This parameter can be used only when the `q` query string parameter is specified.
|
|
4780
|
+
:param analyzer: The analyzer to use for the query string. This parameter can
|
|
4781
|
+
be used only when the `q` query string parameter is specified.
|
|
4015
4782
|
:param batched_reduce_size: The number of shard results that should be reduced
|
|
4016
|
-
at once on the coordinating node.
|
|
4017
|
-
|
|
4018
|
-
|
|
4019
|
-
:param ccs_minimize_roundtrips: If true
|
|
4020
|
-
node and the remote clusters are minimized when
|
|
4783
|
+
at once on the coordinating node. If the potential number of shards in the
|
|
4784
|
+
request can be large, this value should be used as a protection mechanism
|
|
4785
|
+
to reduce the memory overhead per search request.
|
|
4786
|
+
:param ccs_minimize_roundtrips: If `true`, network round-trips between the coordinating
|
|
4787
|
+
node and the remote clusters are minimized when running cross-cluster search
|
|
4021
4788
|
(CCS) requests.
|
|
4022
4789
|
:param collapse: Collapses search results the values of the specified field.
|
|
4023
|
-
:param default_operator: The default operator for query string query: AND
|
|
4024
|
-
OR
|
|
4025
|
-
specified.
|
|
4026
|
-
:param df: Field to use as default where no field prefix is given in the query
|
|
4027
|
-
string. This parameter can only be used when the q query string parameter
|
|
4790
|
+
:param default_operator: The default operator for the query string query: `AND`
|
|
4791
|
+
or `OR`. This parameter can be used only when the `q` query string parameter
|
|
4028
4792
|
is specified.
|
|
4029
|
-
:param
|
|
4030
|
-
|
|
4031
|
-
|
|
4032
|
-
:param
|
|
4033
|
-
|
|
4034
|
-
|
|
4035
|
-
|
|
4036
|
-
|
|
4037
|
-
|
|
4793
|
+
:param df: The field to use as a default when no field prefix is given in the
|
|
4794
|
+
query string. This parameter can be used only when the `q` query string parameter
|
|
4795
|
+
is specified.
|
|
4796
|
+
:param docvalue_fields: An array of wildcard (`*`) field patterns. The request
|
|
4797
|
+
returns doc values for field names matching these patterns in the `hits.fields`
|
|
4798
|
+
property of the response.
|
|
4799
|
+
:param expand_wildcards: The type of index that wildcard patterns can match.
|
|
4800
|
+
If the request can target data streams, this argument determines whether
|
|
4801
|
+
wildcard expressions match hidden data streams. It supports comma-separated
|
|
4802
|
+
values such as `open,hidden`.
|
|
4803
|
+
:param explain: If `true`, the request returns detailed information about score
|
|
4804
|
+
computation as part of a hit.
|
|
4038
4805
|
:param ext: Configuration of search extensions defined by Elasticsearch plugins.
|
|
4039
|
-
:param fields:
|
|
4040
|
-
field names matching these patterns in the `hits.fields` property
|
|
4041
|
-
response.
|
|
4806
|
+
:param fields: An array of wildcard (`*`) field patterns. The request returns
|
|
4807
|
+
values for field names matching these patterns in the `hits.fields` property
|
|
4808
|
+
of the response.
|
|
4042
4809
|
:param force_synthetic_source: Should this request force synthetic _source? Use
|
|
4043
4810
|
this to test if the mapping supports synthetic _source and to get a sense
|
|
4044
4811
|
of the worst case performance. Fetches with this enabled will be slower the
|
|
4045
4812
|
enabling synthetic source natively in the index.
|
|
4046
|
-
:param from_:
|
|
4813
|
+
:param from_: The starting document offset, which must be non-negative. By default,
|
|
4047
4814
|
you cannot page through more than 10,000 hits using the `from` and `size`
|
|
4048
4815
|
parameters. To page through more hits, use the `search_after` parameter.
|
|
4049
4816
|
:param highlight: Specifies the highlighter to use for retrieving highlighted
|
|
@@ -4052,95 +4819,101 @@ class AsyncElasticsearch(BaseClient):
|
|
|
4052
4819
|
be ignored when frozen.
|
|
4053
4820
|
:param ignore_unavailable: If `false`, the request returns an error if it targets
|
|
4054
4821
|
a missing or closed index.
|
|
4055
|
-
:param include_named_queries_score:
|
|
4056
|
-
|
|
4057
|
-
|
|
4058
|
-
|
|
4059
|
-
|
|
4060
|
-
|
|
4061
|
-
|
|
4062
|
-
|
|
4063
|
-
|
|
4822
|
+
:param include_named_queries_score: If `true`, the response includes the score
|
|
4823
|
+
contribution from any named queries. This functionality reruns each named
|
|
4824
|
+
query on every hit in a search response. Typically, this adds a small overhead
|
|
4825
|
+
to a request. However, using computationally expensive named queries on a
|
|
4826
|
+
large number of hits may add significant overhead.
|
|
4827
|
+
:param indices_boost: Boost the `_score` of documents from specified indices.
|
|
4828
|
+
The boost value is the factor by which scores are multiplied. A boost value
|
|
4829
|
+
greater than `1.0` increases the score. A boost value between `0` and `1.0`
|
|
4830
|
+
decreases the score.
|
|
4831
|
+
:param knn: The approximate kNN search to run.
|
|
4064
4832
|
:param lenient: If `true`, format-based query failures (such as providing text
|
|
4065
4833
|
to a numeric field) in the query string will be ignored. This parameter can
|
|
4066
|
-
|
|
4067
|
-
:param max_concurrent_shard_requests:
|
|
4068
|
-
|
|
4069
|
-
|
|
4070
|
-
|
|
4834
|
+
be used only when the `q` query string parameter is specified.
|
|
4835
|
+
:param max_concurrent_shard_requests: The number of concurrent shard requests
|
|
4836
|
+
per node that the search runs concurrently. This value should be used to
|
|
4837
|
+
limit the impact of the search on the cluster in order to limit the number
|
|
4838
|
+
of concurrent shard requests.
|
|
4071
4839
|
:param min_compatible_shard_node: The minimum version of the node that can handle
|
|
4072
4840
|
the request Any handling node with a lower version will fail the request.
|
|
4073
|
-
:param min_score:
|
|
4074
|
-
`_score` are not included in the search results.
|
|
4075
|
-
:param pit:
|
|
4841
|
+
:param min_score: The minimum `_score` for matching documents. Documents with
|
|
4842
|
+
a lower `_score` are not included in the search results.
|
|
4843
|
+
:param pit: Limit the search to a point in time (PIT). If you provide a PIT,
|
|
4076
4844
|
you cannot specify an `<index>` in the request path.
|
|
4077
4845
|
:param post_filter: Use the `post_filter` parameter to filter search results.
|
|
4078
4846
|
The search hits are filtered after the aggregations are calculated. A post
|
|
4079
4847
|
filter has no impact on the aggregation results.
|
|
4080
|
-
:param pre_filter_shard_size:
|
|
4081
|
-
|
|
4082
|
-
|
|
4083
|
-
|
|
4084
|
-
|
|
4085
|
-
|
|
4086
|
-
|
|
4087
|
-
is met:
|
|
4088
|
-
or more read-only index
|
|
4848
|
+
:param pre_filter_shard_size: A threshold that enforces a pre-filter roundtrip
|
|
4849
|
+
to prefilter search shards based on query rewriting if the number of shards
|
|
4850
|
+
the search request expands to exceeds the threshold. This filter roundtrip
|
|
4851
|
+
can limit the number of shards significantly if for instance a shard can
|
|
4852
|
+
not match any documents based on its rewrite method (if date filters are
|
|
4853
|
+
mandatory to match but the shard bounds and the query are disjoint). When
|
|
4854
|
+
unspecified, the pre-filter phase is executed if any of these conditions
|
|
4855
|
+
is met: * The request targets more than 128 shards. * The request targets
|
|
4856
|
+
one or more read-only index. * The primary sort of the query targets an indexed
|
|
4089
4857
|
field.
|
|
4090
|
-
:param preference:
|
|
4858
|
+
:param preference: The nodes and shards used for the search. By default, Elasticsearch
|
|
4091
4859
|
selects from eligible nodes and shards using adaptive replica selection,
|
|
4092
|
-
accounting for allocation awareness. Valid values are: `_only_local` to
|
|
4093
|
-
the search only on shards on the local node; `_local` to, if possible,
|
|
4094
|
-
the search on shards on the local node, or if not, select shards using
|
|
4095
|
-
default method; `_only_nodes:<node-id>,<node-id>` to run the search
|
|
4096
|
-
the specified nodes IDs, where, if suitable shards exist on more
|
|
4097
|
-
selected node, use shards on those nodes using the default method,
|
|
4098
|
-
none of the specified nodes are available, select shards from any available
|
|
4099
|
-
node using the default method; `_prefer_nodes:<node-id>,<node-id>` to if
|
|
4860
|
+
accounting for allocation awareness. Valid values are: * `_only_local` to
|
|
4861
|
+
run the search only on shards on the local node; * `_local` to, if possible,
|
|
4862
|
+
run the search on shards on the local node, or if not, select shards using
|
|
4863
|
+
the default method; * `_only_nodes:<node-id>,<node-id>` to run the search
|
|
4864
|
+
on only the specified nodes IDs, where, if suitable shards exist on more
|
|
4865
|
+
than one selected node, use shards on those nodes using the default method,
|
|
4866
|
+
or if none of the specified nodes are available, select shards from any available
|
|
4867
|
+
node using the default method; * `_prefer_nodes:<node-id>,<node-id>` to if
|
|
4100
4868
|
possible, run the search on the specified nodes IDs, or if not, select shards
|
|
4101
|
-
using the default method; `_shards:<shard>,<shard>` to run the search only
|
|
4102
|
-
on the specified shards; `<custom-string>` (any string that does not start
|
|
4869
|
+
using the default method; * `_shards:<shard>,<shard>` to run the search only
|
|
4870
|
+
on the specified shards; * `<custom-string>` (any string that does not start
|
|
4103
4871
|
with `_`) to route searches with the same `<custom-string>` to the same shards
|
|
4104
4872
|
in the same order.
|
|
4105
4873
|
:param profile: Set to `true` to return detailed timing information about the
|
|
4106
4874
|
execution of individual components in a search request. NOTE: This is a debugging
|
|
4107
4875
|
tool and adds significant overhead to search execution.
|
|
4108
|
-
:param q:
|
|
4109
|
-
|
|
4110
|
-
|
|
4111
|
-
|
|
4112
|
-
|
|
4876
|
+
:param q: A query in the Lucene query string syntax. Query parameter searches
|
|
4877
|
+
do not support the full Elasticsearch Query DSL but are handy for testing.
|
|
4878
|
+
IMPORTANT: This parameter overrides the query parameter in the request body.
|
|
4879
|
+
If both parameters are specified, documents matching the query request body
|
|
4880
|
+
parameter are not returned.
|
|
4881
|
+
:param query: The search definition using the Query DSL.
|
|
4882
|
+
:param rank: The Reciprocal Rank Fusion (RRF) to use.
|
|
4113
4883
|
:param request_cache: If `true`, the caching of search results is enabled for
|
|
4114
|
-
requests where `size` is `0`.
|
|
4884
|
+
requests where `size` is `0`. It defaults to index level settings.
|
|
4115
4885
|
:param rescore: Can be used to improve precision by reordering just the top (for
|
|
4116
4886
|
example 100 - 500) documents returned by the `query` and `post_filter` phases.
|
|
4117
4887
|
:param rest_total_hits_as_int: Indicates whether `hits.total` should be rendered
|
|
4118
4888
|
as an integer or an object in the rest search response.
|
|
4119
4889
|
:param retriever: A retriever is a specification to describe top documents returned
|
|
4120
4890
|
from a search. A retriever replaces other elements of the search API that
|
|
4121
|
-
also return top documents such as query and knn
|
|
4122
|
-
:param routing:
|
|
4123
|
-
|
|
4124
|
-
|
|
4891
|
+
also return top documents such as `query` and `knn`.
|
|
4892
|
+
:param routing: A custom value that is used to route operations to a specific
|
|
4893
|
+
shard.
|
|
4894
|
+
:param runtime_mappings: One or more runtime fields in the search request. These
|
|
4895
|
+
fields take precedence over mapped fields with the same name.
|
|
4125
4896
|
:param script_fields: Retrieve a script evaluation (based on different fields)
|
|
4126
4897
|
for each hit.
|
|
4127
|
-
:param scroll:
|
|
4128
|
-
|
|
4129
|
-
|
|
4898
|
+
:param scroll: The period to retain the search context for scrolling. By default,
|
|
4899
|
+
this value cannot exceed `1d` (24 hours). You can change this limit by using
|
|
4900
|
+
the `search.max_keep_alive` cluster-level setting.
|
|
4130
4901
|
:param search_after: Used to retrieve the next page of hits using a set of sort
|
|
4131
4902
|
values from the previous page.
|
|
4132
|
-
:param search_type:
|
|
4133
|
-
scoring.
|
|
4134
|
-
:param seq_no_primary_term: If `true`, returns sequence number and
|
|
4135
|
-
of the last modification of each hit.
|
|
4136
|
-
:param size: The number of hits to return. By default,
|
|
4137
|
-
more than 10,000 hits using the `from` and `size`
|
|
4138
|
-
more hits, use the `search_after`
|
|
4139
|
-
:param slice:
|
|
4140
|
-
|
|
4903
|
+
:param search_type: Indicates how distributed term frequencies are calculated
|
|
4904
|
+
for relevance scoring.
|
|
4905
|
+
:param seq_no_primary_term: If `true`, the request returns sequence number and
|
|
4906
|
+
primary term of the last modification of each hit.
|
|
4907
|
+
:param size: The number of hits to return, which must not be negative. By default,
|
|
4908
|
+
you cannot page through more than 10,000 hits using the `from` and `size`
|
|
4909
|
+
parameters. To page through more hits, use the `search_after` property.
|
|
4910
|
+
:param slice: Split a scrolled search into multiple slices that can be consumed
|
|
4911
|
+
independently.
|
|
4141
4912
|
:param sort: A comma-separated list of <field>:<direction> pairs.
|
|
4142
|
-
:param source:
|
|
4143
|
-
|
|
4913
|
+
:param source: The source fields that are returned for matching documents. These
|
|
4914
|
+
fields are returned in the `hits._source` property of the search response.
|
|
4915
|
+
If the `stored_fields` property is specified, the `_source` property defaults
|
|
4916
|
+
to `false`. Otherwise, it defaults to `true`.
|
|
4144
4917
|
:param source_excludes: A comma-separated list of source fields to exclude from
|
|
4145
4918
|
the response. You can also use this parameter to exclude fields from the
|
|
4146
4919
|
subset specified in `_source_includes` query parameter. If the `_source`
|
|
@@ -4150,45 +4923,46 @@ class AsyncElasticsearch(BaseClient):
|
|
|
4150
4923
|
returned. You can exclude fields from this subset using the `_source_excludes`
|
|
4151
4924
|
query parameter. If the `_source` parameter is `false`, this parameter is
|
|
4152
4925
|
ignored.
|
|
4153
|
-
:param stats:
|
|
4926
|
+
:param stats: The stats groups to associate with the search. Each group maintains
|
|
4154
4927
|
a statistics aggregation for its associated searches. You can retrieve these
|
|
4155
4928
|
stats using the indices stats API.
|
|
4156
|
-
:param stored_fields:
|
|
4157
|
-
fields are specified, no stored fields are included in the
|
|
4158
|
-
field is specified, the `_source`
|
|
4159
|
-
pass `_source: true` to return both source fields and stored
|
|
4160
|
-
search response.
|
|
4929
|
+
:param stored_fields: A comma-separated list of stored fields to return as part
|
|
4930
|
+
of a hit. If no fields are specified, no stored fields are included in the
|
|
4931
|
+
response. If this field is specified, the `_source` property defaults to
|
|
4932
|
+
`false`. You can pass `_source: true` to return both source fields and stored
|
|
4933
|
+
fields in the search response.
|
|
4161
4934
|
:param suggest: Defines a suggester that provides similar looking terms based
|
|
4162
4935
|
on a provided text.
|
|
4163
|
-
:param suggest_field:
|
|
4164
|
-
:param suggest_mode:
|
|
4165
|
-
|
|
4166
|
-
:param suggest_size:
|
|
4167
|
-
be used when the `suggest_field` and `suggest_text` query string parameters
|
|
4936
|
+
:param suggest_field: The field to use for suggestions.
|
|
4937
|
+
:param suggest_mode: The suggest mode. This parameter can be used only when the
|
|
4938
|
+
`suggest_field` and `suggest_text` query string parameters are specified.
|
|
4939
|
+
:param suggest_size: The number of suggestions to return. This parameter can
|
|
4940
|
+
be used only when the `suggest_field` and `suggest_text` query string parameters
|
|
4168
4941
|
are specified.
|
|
4169
4942
|
:param suggest_text: The source text for which the suggestions should be returned.
|
|
4170
|
-
This parameter can
|
|
4943
|
+
This parameter can be used only when the `suggest_field` and `suggest_text`
|
|
4171
4944
|
query string parameters are specified.
|
|
4172
|
-
:param terminate_after:
|
|
4945
|
+
:param terminate_after: The maximum number of documents to collect for each shard.
|
|
4173
4946
|
If a query reaches this limit, Elasticsearch terminates the query early.
|
|
4174
|
-
Elasticsearch collects documents before sorting. Use with caution.
|
|
4175
|
-
applies this
|
|
4176
|
-
let Elasticsearch perform early termination automatically. Avoid
|
|
4177
|
-
this
|
|
4178
|
-
across multiple data tiers. If set to `0` (default), the query does
|
|
4179
|
-
early.
|
|
4180
|
-
:param timeout:
|
|
4181
|
-
|
|
4182
|
-
|
|
4183
|
-
:param track_scores: If true
|
|
4184
|
-
scores are not used for sorting.
|
|
4947
|
+
Elasticsearch collects documents before sorting. IMPORTANT: Use with caution.
|
|
4948
|
+
Elasticsearch applies this property to each shard handling the request. When
|
|
4949
|
+
possible, let Elasticsearch perform early termination automatically. Avoid
|
|
4950
|
+
specifying this property for requests that target data streams with backing
|
|
4951
|
+
indices across multiple data tiers. If set to `0` (default), the query does
|
|
4952
|
+
not terminate early.
|
|
4953
|
+
:param timeout: The period of time to wait for a response from each shard. If
|
|
4954
|
+
no response is received before the timeout expires, the request fails and
|
|
4955
|
+
returns an error. Defaults to no timeout.
|
|
4956
|
+
:param track_scores: If `true`, calculate and return document scores, even if
|
|
4957
|
+
the scores are not used for sorting.
|
|
4185
4958
|
:param track_total_hits: Number of hits matching the query to count accurately.
|
|
4186
4959
|
If `true`, the exact number of hits is returned at the cost of some performance.
|
|
4187
4960
|
If `false`, the response does not include the total number of hits matching
|
|
4188
4961
|
the query.
|
|
4189
4962
|
:param typed_keys: If `true`, aggregation and suggester names are be prefixed
|
|
4190
4963
|
by their respective types in the response.
|
|
4191
|
-
:param version: If true
|
|
4964
|
+
:param version: If `true`, the request returns the document version as part of
|
|
4965
|
+
a hit.
|
|
4192
4966
|
"""
|
|
4193
4967
|
__path_parts: t.Dict[str, str]
|
|
4194
4968
|
if index not in SKIP_IN_PATH:
|
|
@@ -4420,52 +5194,376 @@ class AsyncElasticsearch(BaseClient):
|
|
|
4420
5194
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
4421
5195
|
) -> BinaryApiResponse:
|
|
4422
5196
|
"""
|
|
4423
|
-
|
|
4424
|
-
|
|
4425
|
-
|
|
5197
|
+
.. raw:: html
|
|
5198
|
+
|
|
5199
|
+
<p>Search a vector tile.</p>
|
|
5200
|
+
<p>Search a vector tile for geospatial values.
|
|
5201
|
+
Before using this API, you should be familiar with the Mapbox vector tile specification.
|
|
5202
|
+
The API returns results as a binary mapbox vector tile.</p>
|
|
5203
|
+
<p>Internally, Elasticsearch translates a vector tile search API request into a search containing:</p>
|
|
5204
|
+
<ul>
|
|
5205
|
+
<li>A <code>geo_bounding_box</code> query on the <code><field></code>. The query uses the <code><zoom>/<x>/<y></code> tile as a bounding box.</li>
|
|
5206
|
+
<li>A <code>geotile_grid</code> or <code>geohex_grid</code> aggregation on the <code><field></code>. The <code>grid_agg</code> parameter determines the aggregation type. The aggregation uses the <code><zoom>/<x>/<y></code> tile as a bounding box.</li>
|
|
5207
|
+
<li>Optionally, a <code>geo_bounds</code> aggregation on the <code><field></code>. The search only includes this aggregation if the <code>exact_bounds</code> parameter is <code>true</code>.</li>
|
|
5208
|
+
<li>If the optional parameter <code>with_labels</code> is <code>true</code>, the internal search will include a dynamic runtime field that calls the <code>getLabelPosition</code> function of the geometry doc value. This enables the generation of new point features containing suggested geometry labels, so that, for example, multi-polygons will have only one label.</li>
|
|
5209
|
+
</ul>
|
|
5210
|
+
<p>For example, Elasticsearch may translate a vector tile search API request with a <code>grid_agg</code> argument of <code>geotile</code> and an <code>exact_bounds</code> argument of <code>true</code> into the following search</p>
|
|
5211
|
+
<pre><code>GET my-index/_search
|
|
5212
|
+
{
|
|
5213
|
+
"size": 10000,
|
|
5214
|
+
"query": {
|
|
5215
|
+
"geo_bounding_box": {
|
|
5216
|
+
"my-geo-field": {
|
|
5217
|
+
"top_left": {
|
|
5218
|
+
"lat": -40.979898069620134,
|
|
5219
|
+
"lon": -45
|
|
5220
|
+
},
|
|
5221
|
+
"bottom_right": {
|
|
5222
|
+
"lat": -66.51326044311186,
|
|
5223
|
+
"lon": 0
|
|
5224
|
+
}
|
|
5225
|
+
}
|
|
5226
|
+
}
|
|
5227
|
+
},
|
|
5228
|
+
"aggregations": {
|
|
5229
|
+
"grid": {
|
|
5230
|
+
"geotile_grid": {
|
|
5231
|
+
"field": "my-geo-field",
|
|
5232
|
+
"precision": 11,
|
|
5233
|
+
"size": 65536,
|
|
5234
|
+
"bounds": {
|
|
5235
|
+
"top_left": {
|
|
5236
|
+
"lat": -40.979898069620134,
|
|
5237
|
+
"lon": -45
|
|
5238
|
+
},
|
|
5239
|
+
"bottom_right": {
|
|
5240
|
+
"lat": -66.51326044311186,
|
|
5241
|
+
"lon": 0
|
|
5242
|
+
}
|
|
5243
|
+
}
|
|
5244
|
+
}
|
|
5245
|
+
},
|
|
5246
|
+
"bounds": {
|
|
5247
|
+
"geo_bounds": {
|
|
5248
|
+
"field": "my-geo-field",
|
|
5249
|
+
"wrap_longitude": false
|
|
5250
|
+
}
|
|
5251
|
+
}
|
|
5252
|
+
}
|
|
5253
|
+
}
|
|
5254
|
+
</code></pre>
|
|
5255
|
+
<p>The API returns results as a binary Mapbox vector tile.
|
|
5256
|
+
Mapbox vector tiles are encoded as Google Protobufs (PBF). By default, the tile contains three layers:</p>
|
|
5257
|
+
<ul>
|
|
5258
|
+
<li>A <code>hits</code> layer containing a feature for each <code><field></code> value matching the <code>geo_bounding_box</code> query.</li>
|
|
5259
|
+
<li>An <code>aggs</code> layer containing a feature for each cell of the <code>geotile_grid</code> or <code>geohex_grid</code>. The layer only contains features for cells with matching data.</li>
|
|
5260
|
+
<li>A meta layer containing:
|
|
5261
|
+
<ul>
|
|
5262
|
+
<li>A feature containing a bounding box. By default, this is the bounding box of the tile.</li>
|
|
5263
|
+
<li>Value ranges for any sub-aggregations on the <code>geotile_grid</code> or <code>geohex_grid</code>.</li>
|
|
5264
|
+
<li>Metadata for the search.</li>
|
|
5265
|
+
</ul>
|
|
5266
|
+
</li>
|
|
5267
|
+
</ul>
|
|
5268
|
+
<p>The API only returns features that can display at its zoom level.
|
|
5269
|
+
For example, if a polygon feature has no area at its zoom level, the API omits it.
|
|
5270
|
+
The API returns errors as UTF-8 encoded JSON.</p>
|
|
5271
|
+
<p>IMPORTANT: You can specify several options for this API as either a query parameter or request body parameter.
|
|
5272
|
+
If you specify both parameters, the query parameter takes precedence.</p>
|
|
5273
|
+
<p><strong>Grid precision for geotile</strong></p>
|
|
5274
|
+
<p>For a <code>grid_agg</code> of <code>geotile</code>, you can use cells in the <code>aggs</code> layer as tiles for lower zoom levels.
|
|
5275
|
+
<code>grid_precision</code> represents the additional zoom levels available through these cells. The final precision is computed by as follows: <code><zoom> + grid_precision</code>.
|
|
5276
|
+
For example, if <code><zoom></code> is 7 and <code>grid_precision</code> is 8, then the <code>geotile_grid</code> aggregation will use a precision of 15.
|
|
5277
|
+
The maximum final precision is 29.
|
|
5278
|
+
The <code>grid_precision</code> also determines the number of cells for the grid as follows: <code>(2^grid_precision) x (2^grid_precision)</code>.
|
|
5279
|
+
For example, a value of 8 divides the tile into a grid of 256 x 256 cells.
|
|
5280
|
+
The <code>aggs</code> layer only contains features for cells with matching data.</p>
|
|
5281
|
+
<p><strong>Grid precision for geohex</strong></p>
|
|
5282
|
+
<p>For a <code>grid_agg</code> of <code>geohex</code>, Elasticsearch uses <code><zoom></code> and <code>grid_precision</code> to calculate a final precision as follows: <code><zoom> + grid_precision</code>.</p>
|
|
5283
|
+
<p>This precision determines the H3 resolution of the hexagonal cells produced by the <code>geohex</code> aggregation.
|
|
5284
|
+
The following table maps the H3 resolution for each precision.
|
|
5285
|
+
For example, if <code><zoom></code> is 3 and <code>grid_precision</code> is 3, the precision is 6.
|
|
5286
|
+
At a precision of 6, hexagonal cells have an H3 resolution of 2.
|
|
5287
|
+
If <code><zoom></code> is 3 and <code>grid_precision</code> is 4, the precision is 7.
|
|
5288
|
+
At a precision of 7, hexagonal cells have an H3 resolution of 3.</p>
|
|
5289
|
+
<table>
|
|
5290
|
+
<thead>
|
|
5291
|
+
<tr>
|
|
5292
|
+
<th>Precision</th>
|
|
5293
|
+
<th>Unique tile bins</th>
|
|
5294
|
+
<th>H3 resolution</th>
|
|
5295
|
+
<th>Unique hex bins</th>
|
|
5296
|
+
<th>Ratio</th>
|
|
5297
|
+
</tr>
|
|
5298
|
+
</thead>
|
|
5299
|
+
<tbody>
|
|
5300
|
+
<tr>
|
|
5301
|
+
<td>1</td>
|
|
5302
|
+
<td>4</td>
|
|
5303
|
+
<td>0</td>
|
|
5304
|
+
<td>122</td>
|
|
5305
|
+
<td>30.5</td>
|
|
5306
|
+
</tr>
|
|
5307
|
+
<tr>
|
|
5308
|
+
<td>2</td>
|
|
5309
|
+
<td>16</td>
|
|
5310
|
+
<td>0</td>
|
|
5311
|
+
<td>122</td>
|
|
5312
|
+
<td>7.625</td>
|
|
5313
|
+
</tr>
|
|
5314
|
+
<tr>
|
|
5315
|
+
<td>3</td>
|
|
5316
|
+
<td>64</td>
|
|
5317
|
+
<td>1</td>
|
|
5318
|
+
<td>842</td>
|
|
5319
|
+
<td>13.15625</td>
|
|
5320
|
+
</tr>
|
|
5321
|
+
<tr>
|
|
5322
|
+
<td>4</td>
|
|
5323
|
+
<td>256</td>
|
|
5324
|
+
<td>1</td>
|
|
5325
|
+
<td>842</td>
|
|
5326
|
+
<td>3.2890625</td>
|
|
5327
|
+
</tr>
|
|
5328
|
+
<tr>
|
|
5329
|
+
<td>5</td>
|
|
5330
|
+
<td>1024</td>
|
|
5331
|
+
<td>2</td>
|
|
5332
|
+
<td>5882</td>
|
|
5333
|
+
<td>5.744140625</td>
|
|
5334
|
+
</tr>
|
|
5335
|
+
<tr>
|
|
5336
|
+
<td>6</td>
|
|
5337
|
+
<td>4096</td>
|
|
5338
|
+
<td>2</td>
|
|
5339
|
+
<td>5882</td>
|
|
5340
|
+
<td>1.436035156</td>
|
|
5341
|
+
</tr>
|
|
5342
|
+
<tr>
|
|
5343
|
+
<td>7</td>
|
|
5344
|
+
<td>16384</td>
|
|
5345
|
+
<td>3</td>
|
|
5346
|
+
<td>41162</td>
|
|
5347
|
+
<td>2.512329102</td>
|
|
5348
|
+
</tr>
|
|
5349
|
+
<tr>
|
|
5350
|
+
<td>8</td>
|
|
5351
|
+
<td>65536</td>
|
|
5352
|
+
<td>3</td>
|
|
5353
|
+
<td>41162</td>
|
|
5354
|
+
<td>0.6280822754</td>
|
|
5355
|
+
</tr>
|
|
5356
|
+
<tr>
|
|
5357
|
+
<td>9</td>
|
|
5358
|
+
<td>262144</td>
|
|
5359
|
+
<td>4</td>
|
|
5360
|
+
<td>288122</td>
|
|
5361
|
+
<td>1.099098206</td>
|
|
5362
|
+
</tr>
|
|
5363
|
+
<tr>
|
|
5364
|
+
<td>10</td>
|
|
5365
|
+
<td>1048576</td>
|
|
5366
|
+
<td>4</td>
|
|
5367
|
+
<td>288122</td>
|
|
5368
|
+
<td>0.2747745514</td>
|
|
5369
|
+
</tr>
|
|
5370
|
+
<tr>
|
|
5371
|
+
<td>11</td>
|
|
5372
|
+
<td>4194304</td>
|
|
5373
|
+
<td>5</td>
|
|
5374
|
+
<td>2016842</td>
|
|
5375
|
+
<td>0.4808526039</td>
|
|
5376
|
+
</tr>
|
|
5377
|
+
<tr>
|
|
5378
|
+
<td>12</td>
|
|
5379
|
+
<td>16777216</td>
|
|
5380
|
+
<td>6</td>
|
|
5381
|
+
<td>14117882</td>
|
|
5382
|
+
<td>0.8414913416</td>
|
|
5383
|
+
</tr>
|
|
5384
|
+
<tr>
|
|
5385
|
+
<td>13</td>
|
|
5386
|
+
<td>67108864</td>
|
|
5387
|
+
<td>6</td>
|
|
5388
|
+
<td>14117882</td>
|
|
5389
|
+
<td>0.2103728354</td>
|
|
5390
|
+
</tr>
|
|
5391
|
+
<tr>
|
|
5392
|
+
<td>14</td>
|
|
5393
|
+
<td>268435456</td>
|
|
5394
|
+
<td>7</td>
|
|
5395
|
+
<td>98825162</td>
|
|
5396
|
+
<td>0.3681524172</td>
|
|
5397
|
+
</tr>
|
|
5398
|
+
<tr>
|
|
5399
|
+
<td>15</td>
|
|
5400
|
+
<td>1073741824</td>
|
|
5401
|
+
<td>8</td>
|
|
5402
|
+
<td>691776122</td>
|
|
5403
|
+
<td>0.644266719</td>
|
|
5404
|
+
</tr>
|
|
5405
|
+
<tr>
|
|
5406
|
+
<td>16</td>
|
|
5407
|
+
<td>4294967296</td>
|
|
5408
|
+
<td>8</td>
|
|
5409
|
+
<td>691776122</td>
|
|
5410
|
+
<td>0.1610666797</td>
|
|
5411
|
+
</tr>
|
|
5412
|
+
<tr>
|
|
5413
|
+
<td>17</td>
|
|
5414
|
+
<td>17179869184</td>
|
|
5415
|
+
<td>9</td>
|
|
5416
|
+
<td>4842432842</td>
|
|
5417
|
+
<td>0.2818666889</td>
|
|
5418
|
+
</tr>
|
|
5419
|
+
<tr>
|
|
5420
|
+
<td>18</td>
|
|
5421
|
+
<td>68719476736</td>
|
|
5422
|
+
<td>10</td>
|
|
5423
|
+
<td>33897029882</td>
|
|
5424
|
+
<td>0.4932667053</td>
|
|
5425
|
+
</tr>
|
|
5426
|
+
<tr>
|
|
5427
|
+
<td>19</td>
|
|
5428
|
+
<td>274877906944</td>
|
|
5429
|
+
<td>11</td>
|
|
5430
|
+
<td>237279209162</td>
|
|
5431
|
+
<td>0.8632167343</td>
|
|
5432
|
+
</tr>
|
|
5433
|
+
<tr>
|
|
5434
|
+
<td>20</td>
|
|
5435
|
+
<td>1099511627776</td>
|
|
5436
|
+
<td>11</td>
|
|
5437
|
+
<td>237279209162</td>
|
|
5438
|
+
<td>0.2158041836</td>
|
|
5439
|
+
</tr>
|
|
5440
|
+
<tr>
|
|
5441
|
+
<td>21</td>
|
|
5442
|
+
<td>4398046511104</td>
|
|
5443
|
+
<td>12</td>
|
|
5444
|
+
<td>1660954464122</td>
|
|
5445
|
+
<td>0.3776573213</td>
|
|
5446
|
+
</tr>
|
|
5447
|
+
<tr>
|
|
5448
|
+
<td>22</td>
|
|
5449
|
+
<td>17592186044416</td>
|
|
5450
|
+
<td>13</td>
|
|
5451
|
+
<td>11626681248842</td>
|
|
5452
|
+
<td>0.6609003122</td>
|
|
5453
|
+
</tr>
|
|
5454
|
+
<tr>
|
|
5455
|
+
<td>23</td>
|
|
5456
|
+
<td>70368744177664</td>
|
|
5457
|
+
<td>13</td>
|
|
5458
|
+
<td>11626681248842</td>
|
|
5459
|
+
<td>0.165225078</td>
|
|
5460
|
+
</tr>
|
|
5461
|
+
<tr>
|
|
5462
|
+
<td>24</td>
|
|
5463
|
+
<td>281474976710656</td>
|
|
5464
|
+
<td>14</td>
|
|
5465
|
+
<td>81386768741882</td>
|
|
5466
|
+
<td>0.2891438866</td>
|
|
5467
|
+
</tr>
|
|
5468
|
+
<tr>
|
|
5469
|
+
<td>25</td>
|
|
5470
|
+
<td>1125899906842620</td>
|
|
5471
|
+
<td>15</td>
|
|
5472
|
+
<td>569707381193162</td>
|
|
5473
|
+
<td>0.5060018015</td>
|
|
5474
|
+
</tr>
|
|
5475
|
+
<tr>
|
|
5476
|
+
<td>26</td>
|
|
5477
|
+
<td>4503599627370500</td>
|
|
5478
|
+
<td>15</td>
|
|
5479
|
+
<td>569707381193162</td>
|
|
5480
|
+
<td>0.1265004504</td>
|
|
5481
|
+
</tr>
|
|
5482
|
+
<tr>
|
|
5483
|
+
<td>27</td>
|
|
5484
|
+
<td>18014398509482000</td>
|
|
5485
|
+
<td>15</td>
|
|
5486
|
+
<td>569707381193162</td>
|
|
5487
|
+
<td>0.03162511259</td>
|
|
5488
|
+
</tr>
|
|
5489
|
+
<tr>
|
|
5490
|
+
<td>28</td>
|
|
5491
|
+
<td>72057594037927900</td>
|
|
5492
|
+
<td>15</td>
|
|
5493
|
+
<td>569707381193162</td>
|
|
5494
|
+
<td>0.007906278149</td>
|
|
5495
|
+
</tr>
|
|
5496
|
+
<tr>
|
|
5497
|
+
<td>29</td>
|
|
5498
|
+
<td>288230376151712000</td>
|
|
5499
|
+
<td>15</td>
|
|
5500
|
+
<td>569707381193162</td>
|
|
5501
|
+
<td>0.001976569537</td>
|
|
5502
|
+
</tr>
|
|
5503
|
+
</tbody>
|
|
5504
|
+
</table>
|
|
5505
|
+
<p>Hexagonal cells don't align perfectly on a vector tile.
|
|
5506
|
+
Some cells may intersect more than one vector tile.
|
|
5507
|
+
To compute the H3 resolution for each precision, Elasticsearch compares the average density of hexagonal bins at each resolution with the average density of tile bins at each zoom level.
|
|
5508
|
+
Elasticsearch uses the H3 resolution that is closest to the corresponding geotile density.</p>
|
|
5509
|
+
|
|
5510
|
+
|
|
5511
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/search-vector-tile-api.html>`_
|
|
4426
5512
|
|
|
4427
5513
|
:param index: Comma-separated list of data streams, indices, or aliases to search
|
|
4428
5514
|
:param field: Field containing geospatial data to return
|
|
4429
5515
|
:param zoom: Zoom level for the vector tile to search
|
|
4430
5516
|
:param x: X coordinate for the vector tile to search
|
|
4431
5517
|
:param y: Y coordinate for the vector tile to search
|
|
4432
|
-
:param aggs: Sub-aggregations for the geotile_grid.
|
|
4433
|
-
types: - avg -
|
|
4434
|
-
|
|
4435
|
-
|
|
4436
|
-
|
|
4437
|
-
:param
|
|
4438
|
-
|
|
4439
|
-
|
|
4440
|
-
|
|
4441
|
-
|
|
4442
|
-
|
|
5518
|
+
:param aggs: Sub-aggregations for the geotile_grid. It supports the following
|
|
5519
|
+
aggregation types: - `avg` - `boxplot` - `cardinality` - `extended stats`
|
|
5520
|
+
- `max` - `median absolute deviation` - `min` - `percentile` - `percentile-rank`
|
|
5521
|
+
- `stats` - `sum` - `value count` The aggregation names can't start with
|
|
5522
|
+
`_mvt_`. The `_mvt_` prefix is reserved for internal aggregations.
|
|
5523
|
+
:param buffer: The size, in pixels, of a clipping buffer outside the tile. This
|
|
5524
|
+
allows renderers to avoid outline artifacts from geometries that extend past
|
|
5525
|
+
the extent of the tile.
|
|
5526
|
+
:param exact_bounds: If `false`, the meta layer's feature is the bounding box
|
|
5527
|
+
of the tile. If `true`, the meta layer's feature is a bounding box resulting
|
|
5528
|
+
from a `geo_bounds` aggregation. The aggregation runs on <field> values that
|
|
5529
|
+
intersect the `<zoom>/<x>/<y>` tile with `wrap_longitude` set to `false`.
|
|
5530
|
+
The resulting bounding box may be larger than the vector tile.
|
|
5531
|
+
:param extent: The size, in pixels, of a side of the tile. Vector tiles are square
|
|
4443
5532
|
with equal sides.
|
|
4444
|
-
:param fields:
|
|
4445
|
-
This parameter does not support fields with array values. Fields with
|
|
4446
|
-
values may return inconsistent results.
|
|
4447
|
-
:param grid_agg:
|
|
5533
|
+
:param fields: The fields to return in the `hits` layer. It supports wildcards
|
|
5534
|
+
(`*`). This parameter does not support fields with array values. Fields with
|
|
5535
|
+
array values may return inconsistent results.
|
|
5536
|
+
:param grid_agg: The aggregation used to create a grid for the `field`.
|
|
4448
5537
|
:param grid_precision: Additional zoom levels available through the aggs layer.
|
|
4449
|
-
For example, if
|
|
4450
|
-
level 15. Accepts 0-8. If 0, results don
|
|
5538
|
+
For example, if `<zoom>` is `7` and `grid_precision` is `8`, you can zoom
|
|
5539
|
+
in up to level 15. Accepts 0-8. If 0, results don't include the aggs layer.
|
|
4451
5540
|
:param grid_type: Determines the geometry type for features in the aggs layer.
|
|
4452
|
-
In the aggs layer, each feature represents a geotile_grid cell. If
|
|
4453
|
-
each feature is a
|
|
5541
|
+
In the aggs layer, each feature represents a `geotile_grid` cell. If `grid,
|
|
5542
|
+
each feature is a polygon of the cells bounding box. If `point`, each feature
|
|
4454
5543
|
is a Point that is the centroid of the cell.
|
|
4455
|
-
:param query:
|
|
5544
|
+
:param query: The query DSL used to filter documents for the search.
|
|
4456
5545
|
:param runtime_mappings: Defines one or more runtime fields in the search request.
|
|
4457
5546
|
These fields take precedence over mapped fields with the same name.
|
|
4458
|
-
:param size:
|
|
4459
|
-
0-10000. If 0, results don
|
|
4460
|
-
:param sort:
|
|
4461
|
-
a bounding box for each feature. It sorts features based on this box
|
|
5547
|
+
:param size: The maximum number of features to return in the hits layer. Accepts
|
|
5548
|
+
0-10000. If 0, results don't include the hits layer.
|
|
5549
|
+
:param sort: Sort the features in the hits layer. By default, the API calculates
|
|
5550
|
+
a bounding box for each feature. It sorts features based on this box's diagonal
|
|
4462
5551
|
length, from longest to shortest.
|
|
4463
|
-
:param track_total_hits:
|
|
5552
|
+
:param track_total_hits: The number of hits matching the query to count accurately.
|
|
4464
5553
|
If `true`, the exact number of hits is returned at the cost of some performance.
|
|
4465
5554
|
If `false`, the response does not include the total number of hits matching
|
|
4466
5555
|
the query.
|
|
4467
5556
|
:param with_labels: If `true`, the hits and aggs layers will contain additional
|
|
4468
5557
|
point features representing suggested label positions for the original features.
|
|
5558
|
+
* `Point` and `MultiPoint` features will have one of the points selected.
|
|
5559
|
+
* `Polygon` and `MultiPolygon` features will have a single point generated,
|
|
5560
|
+
either the centroid, if it is within the polygon, or another point within
|
|
5561
|
+
the polygon selected from the sorted triangle-tree. * `LineString` features
|
|
5562
|
+
will likewise provide a roughly central point selected from the triangle-tree.
|
|
5563
|
+
* The aggregation results will provide one central point for each aggregation
|
|
5564
|
+
bucket. All attributes from the original features will also be copied to
|
|
5565
|
+
the new label features. In addition, the new features will be distinguishable
|
|
5566
|
+
using the tag `_mvt_label_position`.
|
|
4469
5567
|
"""
|
|
4470
5568
|
if index in SKIP_IN_PATH:
|
|
4471
5569
|
raise ValueError("Empty value passed for parameter 'index'")
|
|
@@ -4569,20 +5667,26 @@ class AsyncElasticsearch(BaseClient):
|
|
|
4569
5667
|
human: t.Optional[bool] = None,
|
|
4570
5668
|
ignore_unavailable: t.Optional[bool] = None,
|
|
4571
5669
|
local: t.Optional[bool] = None,
|
|
5670
|
+
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
4572
5671
|
preference: t.Optional[str] = None,
|
|
4573
5672
|
pretty: t.Optional[bool] = None,
|
|
4574
5673
|
routing: t.Optional[str] = None,
|
|
4575
5674
|
) -> ObjectApiResponse[t.Any]:
|
|
4576
5675
|
"""
|
|
4577
|
-
|
|
4578
|
-
|
|
4579
|
-
|
|
4580
|
-
|
|
5676
|
+
.. raw:: html
|
|
5677
|
+
|
|
5678
|
+
<p>Get the search shards.</p>
|
|
5679
|
+
<p>Get the indices and shards that a search request would be run against.
|
|
5680
|
+
This information can be useful for working out issues or planning optimizations with routing and shard preferences.
|
|
5681
|
+
When filtered aliases are used, the filter is returned as part of the <code>indices</code> section.</p>
|
|
5682
|
+
<p>If the Elasticsearch security features are enabled, you must have the <code>view_index_metadata</code> or <code>manage</code> index privilege for the target data stream, index, or alias.</p>
|
|
4581
5683
|
|
|
4582
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.17/search-shards.html>`_
|
|
4583
5684
|
|
|
4584
|
-
|
|
4585
|
-
|
|
5685
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/search-shards.html>`_
|
|
5686
|
+
|
|
5687
|
+
:param index: A comma-separated list of data streams, indices, and aliases to
|
|
5688
|
+
search. It supports wildcards (`*`). To search all data streams and indices,
|
|
5689
|
+
omit this parameter or use `*` or `_all`.
|
|
4586
5690
|
:param allow_no_indices: If `false`, the request returns an error if any wildcard
|
|
4587
5691
|
expression, index alias, or `_all` value targets only missing or closed indices.
|
|
4588
5692
|
This behavior applies even if the request targets other open indices. For
|
|
@@ -4596,9 +5700,13 @@ class AsyncElasticsearch(BaseClient):
|
|
|
4596
5700
|
a missing or closed index.
|
|
4597
5701
|
:param local: If `true`, the request retrieves information from the local node
|
|
4598
5702
|
only.
|
|
4599
|
-
:param
|
|
4600
|
-
|
|
4601
|
-
|
|
5703
|
+
:param master_timeout: The period to wait for a connection to the master node.
|
|
5704
|
+
If the master node is not available before the timeout expires, the request
|
|
5705
|
+
fails and returns an error. IT can also be set to `-1` to indicate that the
|
|
5706
|
+
request should never timeout.
|
|
5707
|
+
:param preference: The node or shard the operation should be performed on. It
|
|
5708
|
+
is random by default.
|
|
5709
|
+
:param routing: A custom value used to route operations to a specific shard.
|
|
4602
5710
|
"""
|
|
4603
5711
|
__path_parts: t.Dict[str, str]
|
|
4604
5712
|
if index not in SKIP_IN_PATH:
|
|
@@ -4622,6 +5730,8 @@ class AsyncElasticsearch(BaseClient):
|
|
|
4622
5730
|
__query["ignore_unavailable"] = ignore_unavailable
|
|
4623
5731
|
if local is not None:
|
|
4624
5732
|
__query["local"] = local
|
|
5733
|
+
if master_timeout is not None:
|
|
5734
|
+
__query["master_timeout"] = master_timeout
|
|
4625
5735
|
if preference is not None:
|
|
4626
5736
|
__query["preference"] = preference
|
|
4627
5737
|
if pretty is not None:
|
|
@@ -4678,12 +5788,15 @@ class AsyncElasticsearch(BaseClient):
|
|
|
4678
5788
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
4679
5789
|
) -> ObjectApiResponse[t.Any]:
|
|
4680
5790
|
"""
|
|
4681
|
-
|
|
5791
|
+
.. raw:: html
|
|
4682
5792
|
|
|
4683
|
-
|
|
5793
|
+
<p>Run a search with a search template.</p>
|
|
4684
5794
|
|
|
4685
|
-
|
|
4686
|
-
|
|
5795
|
+
|
|
5796
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/search-template-api.html>`_
|
|
5797
|
+
|
|
5798
|
+
:param index: A comma-separated list of data streams, indices, and aliases to
|
|
5799
|
+
search. It supports wildcards (`*`).
|
|
4687
5800
|
:param allow_no_indices: If `false`, the request returns an error if any wildcard
|
|
4688
5801
|
expression, index alias, or `_all` value targets only missing or closed indices.
|
|
4689
5802
|
This behavior applies even if the request targets other open indices. For
|
|
@@ -4691,32 +5804,34 @@ class AsyncElasticsearch(BaseClient):
|
|
|
4691
5804
|
with `foo` but no index starts with `bar`.
|
|
4692
5805
|
:param ccs_minimize_roundtrips: If `true`, network round-trips are minimized
|
|
4693
5806
|
for cross-cluster search requests.
|
|
4694
|
-
:param expand_wildcards:
|
|
4695
|
-
request can target data streams, this argument determines whether
|
|
4696
|
-
expressions match hidden data streams. Supports comma-separated
|
|
4697
|
-
as `open,hidden`. Valid values are: `all`, `open`, `closed`,
|
|
5807
|
+
:param expand_wildcards: The type of index that wildcard patterns can match.
|
|
5808
|
+
If the request can target data streams, this argument determines whether
|
|
5809
|
+
wildcard expressions match hidden data streams. Supports comma-separated
|
|
5810
|
+
values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`,
|
|
5811
|
+
`hidden`, `none`.
|
|
4698
5812
|
:param explain: If `true`, returns detailed information about score calculation
|
|
4699
|
-
as part of each hit.
|
|
4700
|
-
|
|
4701
|
-
|
|
5813
|
+
as part of each hit. If you specify both this and the `explain` query parameter,
|
|
5814
|
+
the API uses only the query parameter.
|
|
5815
|
+
:param id: The ID of the search template to use. If no `source` is specified,
|
|
5816
|
+
this parameter is required.
|
|
4702
5817
|
:param ignore_throttled: If `true`, specified concrete, expanded, or aliased
|
|
4703
5818
|
indices are not included in the response when throttled.
|
|
4704
5819
|
:param ignore_unavailable: If `false`, the request returns an error if it targets
|
|
4705
5820
|
a missing or closed index.
|
|
4706
5821
|
:param params: Key-value pairs used to replace Mustache variables in the template.
|
|
4707
5822
|
The key is the variable name. The value is the variable value.
|
|
4708
|
-
:param preference:
|
|
4709
|
-
|
|
5823
|
+
:param preference: The node or shard the operation should be performed on. It
|
|
5824
|
+
is random by default.
|
|
4710
5825
|
:param profile: If `true`, the query execution is profiled.
|
|
4711
|
-
:param rest_total_hits_as_int: If true
|
|
4712
|
-
in the response.
|
|
4713
|
-
:param routing:
|
|
5826
|
+
:param rest_total_hits_as_int: If `true`, `hits.total` is rendered as an integer
|
|
5827
|
+
in the response. If `false`, it is rendered as an object.
|
|
5828
|
+
:param routing: A custom value used to route operations to a specific shard.
|
|
4714
5829
|
:param scroll: Specifies how long a consistent view of the index should be maintained
|
|
4715
5830
|
for scrolled search.
|
|
4716
5831
|
:param search_type: The type of the search operation.
|
|
4717
5832
|
:param source: An inline search template. Supports the same parameters as the
|
|
4718
|
-
search API's request body.
|
|
4719
|
-
specified, this parameter is required.
|
|
5833
|
+
search API's request body. It also supports Mustache variables. If no `id`
|
|
5834
|
+
is specified, this parameter is required.
|
|
4720
5835
|
:param typed_keys: If `true`, the response prefixes aggregation and suggester
|
|
4721
5836
|
names with their respective types.
|
|
4722
5837
|
"""
|
|
@@ -4810,34 +5925,39 @@ class AsyncElasticsearch(BaseClient):
|
|
|
4810
5925
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
4811
5926
|
) -> ObjectApiResponse[t.Any]:
|
|
4812
5927
|
"""
|
|
4813
|
-
|
|
4814
|
-
This "terms enum" API is designed for low-latency look-ups used in auto-complete
|
|
4815
|
-
scenarios. If the `complete` property in the response is false, the returned
|
|
4816
|
-
terms set may be incomplete and should be treated as approximate. This can occur
|
|
4817
|
-
due to a few reasons, such as a request timeout or a node error. NOTE: The terms
|
|
4818
|
-
enum API may return terms from deleted documents. Deleted documents are initially
|
|
4819
|
-
only marked as deleted. It is not until their segments are merged that documents
|
|
4820
|
-
are actually deleted. Until that happens, the terms enum API will return terms
|
|
4821
|
-
from these documents.
|
|
4822
|
-
|
|
4823
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.17/search-terms-enum.html>`_
|
|
5928
|
+
.. raw:: html
|
|
4824
5929
|
|
|
4825
|
-
|
|
4826
|
-
|
|
5930
|
+
<p>Get terms in an index.</p>
|
|
5931
|
+
<p>Discover terms that match a partial string in an index.
|
|
5932
|
+
This API is designed for low-latency look-ups used in auto-complete scenarios.</p>
|
|
5933
|
+
<blockquote>
|
|
5934
|
+
<p>info
|
|
5935
|
+
The terms enum API may return terms from deleted documents. Deleted documents are initially only marked as deleted. It is not until their segments are merged that documents are actually deleted. Until that happens, the terms enum API will return terms from these documents.</p>
|
|
5936
|
+
</blockquote>
|
|
5937
|
+
|
|
5938
|
+
|
|
5939
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/search-terms-enum.html>`_
|
|
5940
|
+
|
|
5941
|
+
:param index: A comma-separated list of data streams, indices, and index aliases
|
|
5942
|
+
to search. Wildcard (`*`) expressions are supported. To search all data streams
|
|
5943
|
+
or indices, omit this parameter or use `*` or `_all`.
|
|
4827
5944
|
:param field: The string to match at the start of indexed terms. If not provided,
|
|
4828
5945
|
all terms in the field are considered.
|
|
4829
|
-
:param case_insensitive: When true the provided search string is matched against
|
|
5946
|
+
:param case_insensitive: When `true`, the provided search string is matched against
|
|
4830
5947
|
index terms without case sensitivity.
|
|
4831
|
-
:param index_filter:
|
|
4832
|
-
|
|
4833
|
-
:param search_after:
|
|
4834
|
-
|
|
4835
|
-
|
|
4836
|
-
|
|
4837
|
-
|
|
4838
|
-
|
|
4839
|
-
|
|
4840
|
-
|
|
5948
|
+
:param index_filter: Filter an index shard if the provided query rewrites to
|
|
5949
|
+
`match_none`.
|
|
5950
|
+
:param search_after: The string after which terms in the index should be returned.
|
|
5951
|
+
It allows for a form of pagination if the last result from one request is
|
|
5952
|
+
passed as the `search_after` parameter for a subsequent request.
|
|
5953
|
+
:param size: The number of matching terms to return.
|
|
5954
|
+
:param string: The string to match at the start of indexed terms. If it is not
|
|
5955
|
+
provided, all terms in the field are considered. > info > The prefix string
|
|
5956
|
+
cannot be larger than the largest possible keyword value, which is Lucene's
|
|
5957
|
+
term byte-length limit of 32766.
|
|
5958
|
+
:param timeout: The maximum length of time to spend collecting results. If the
|
|
5959
|
+
timeout is exceeded the `complete` flag set to `false` in the response and
|
|
5960
|
+
the results may be partial or empty.
|
|
4841
5961
|
"""
|
|
4842
5962
|
if index in SKIP_IN_PATH:
|
|
4843
5963
|
raise ValueError("Empty value passed for parameter 'index'")
|
|
@@ -4886,7 +6006,20 @@ class AsyncElasticsearch(BaseClient):
|
|
|
4886
6006
|
)
|
|
4887
6007
|
|
|
4888
6008
|
@_rewrite_parameters(
|
|
4889
|
-
body_fields=(
|
|
6009
|
+
body_fields=(
|
|
6010
|
+
"doc",
|
|
6011
|
+
"field_statistics",
|
|
6012
|
+
"fields",
|
|
6013
|
+
"filter",
|
|
6014
|
+
"offsets",
|
|
6015
|
+
"payloads",
|
|
6016
|
+
"per_field_analyzer",
|
|
6017
|
+
"positions",
|
|
6018
|
+
"routing",
|
|
6019
|
+
"term_statistics",
|
|
6020
|
+
"version",
|
|
6021
|
+
"version_type",
|
|
6022
|
+
),
|
|
4890
6023
|
)
|
|
4891
6024
|
async def termvectors(
|
|
4892
6025
|
self,
|
|
@@ -4916,33 +6049,77 @@ class AsyncElasticsearch(BaseClient):
|
|
|
4916
6049
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
4917
6050
|
) -> ObjectApiResponse[t.Any]:
|
|
4918
6051
|
"""
|
|
4919
|
-
|
|
4920
|
-
|
|
4921
|
-
|
|
4922
|
-
|
|
4923
|
-
|
|
4924
|
-
|
|
4925
|
-
|
|
6052
|
+
.. raw:: html
|
|
6053
|
+
|
|
6054
|
+
<p>Get term vector information.</p>
|
|
6055
|
+
<p>Get information and statistics about terms in the fields of a particular document.</p>
|
|
6056
|
+
<p>You can retrieve term vectors for documents stored in the index or for artificial documents passed in the body of the request.
|
|
6057
|
+
You can specify the fields you are interested in through the <code>fields</code> parameter or by adding the fields to the request body.
|
|
6058
|
+
For example:</p>
|
|
6059
|
+
<pre><code>GET /my-index-000001/_termvectors/1?fields=message
|
|
6060
|
+
</code></pre>
|
|
6061
|
+
<p>Fields can be specified using wildcards, similar to the multi match query.</p>
|
|
6062
|
+
<p>Term vectors are real-time by default, not near real-time.
|
|
6063
|
+
This can be changed by setting <code>realtime</code> parameter to <code>false</code>.</p>
|
|
6064
|
+
<p>You can request three types of values: <em>term information</em>, <em>term statistics</em>, and <em>field statistics</em>.
|
|
6065
|
+
By default, all term information and field statistics are returned for all fields but term statistics are excluded.</p>
|
|
6066
|
+
<p><strong>Term information</strong></p>
|
|
6067
|
+
<ul>
|
|
6068
|
+
<li>term frequency in the field (always returned)</li>
|
|
6069
|
+
<li>term positions (<code>positions: true</code>)</li>
|
|
6070
|
+
<li>start and end offsets (<code>offsets: true</code>)</li>
|
|
6071
|
+
<li>term payloads (<code>payloads: true</code>), as base64 encoded bytes</li>
|
|
6072
|
+
</ul>
|
|
6073
|
+
<p>If the requested information wasn't stored in the index, it will be computed on the fly if possible.
|
|
6074
|
+
Additionally, term vectors could be computed for documents not even existing in the index, but instead provided by the user.</p>
|
|
6075
|
+
<blockquote>
|
|
6076
|
+
<p>warn
|
|
6077
|
+
Start and end offsets assume UTF-16 encoding is being used. If you want to use these offsets in order to get the original text that produced this token, you should make sure that the string you are taking a sub-string of is also encoded using UTF-16.</p>
|
|
6078
|
+
</blockquote>
|
|
6079
|
+
<p><strong>Behaviour</strong></p>
|
|
6080
|
+
<p>The term and field statistics are not accurate.
|
|
6081
|
+
Deleted documents are not taken into account.
|
|
6082
|
+
The information is only retrieved for the shard the requested document resides in.
|
|
6083
|
+
The term and field statistics are therefore only useful as relative measures whereas the absolute numbers have no meaning in this context.
|
|
6084
|
+
By default, when requesting term vectors of artificial documents, a shard to get the statistics from is randomly selected.
|
|
6085
|
+
Use <code>routing</code> only to hit a particular shard.</p>
|
|
6086
|
+
|
|
6087
|
+
|
|
6088
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-termvectors.html>`_
|
|
6089
|
+
|
|
6090
|
+
:param index: The name of the index that contains the document.
|
|
6091
|
+
:param id: A unique identifier for the document.
|
|
4926
6092
|
:param doc: An artificial document (a document not present in the index) for
|
|
4927
6093
|
which you want to retrieve term vectors.
|
|
4928
|
-
:param field_statistics: If `true`, the response includes
|
|
4929
|
-
|
|
4930
|
-
|
|
4931
|
-
|
|
4932
|
-
|
|
4933
|
-
:param
|
|
6094
|
+
:param field_statistics: If `true`, the response includes: * The document count
|
|
6095
|
+
(how many documents contain this field). * The sum of document frequencies
|
|
6096
|
+
(the sum of document frequencies for all terms in this field). * The sum
|
|
6097
|
+
of total term frequencies (the sum of total term frequencies of each term
|
|
6098
|
+
in this field).
|
|
6099
|
+
:param fields: A list of fields to include in the statistics. It is used as the
|
|
6100
|
+
default list unless a specific field list is provided in the `completion_fields`
|
|
6101
|
+
or `fielddata_fields` parameters.
|
|
6102
|
+
:param filter: Filter terms based on their tf-idf scores. This could be useful
|
|
6103
|
+
in order find out a good characteristic vector of a document. This feature
|
|
6104
|
+
works in a similar manner to the second phase of the More Like This Query.
|
|
4934
6105
|
:param offsets: If `true`, the response includes term offsets.
|
|
4935
6106
|
:param payloads: If `true`, the response includes term payloads.
|
|
4936
|
-
:param per_field_analyzer:
|
|
6107
|
+
:param per_field_analyzer: Override the default per-field analyzer. This is useful
|
|
6108
|
+
in order to generate term vectors in any fashion, especially when using artificial
|
|
6109
|
+
documents. When providing an analyzer for a field that already stores term
|
|
6110
|
+
vectors, the term vectors will be regenerated.
|
|
4937
6111
|
:param positions: If `true`, the response includes term positions.
|
|
4938
|
-
:param preference:
|
|
4939
|
-
|
|
6112
|
+
:param preference: The node or shard the operation should be performed on. It
|
|
6113
|
+
is random by default.
|
|
4940
6114
|
:param realtime: If true, the request is real-time as opposed to near-real-time.
|
|
4941
|
-
:param routing:
|
|
4942
|
-
|
|
4943
|
-
|
|
6115
|
+
:param routing: A custom value that is used to route operations to a specific
|
|
6116
|
+
shard.
|
|
6117
|
+
:param term_statistics: If `true`, the response includes: * The total term frequency
|
|
6118
|
+
(how often a term occurs in all documents). * The document frequency (the
|
|
6119
|
+
number of documents containing the current term). By default these values
|
|
6120
|
+
are not returned since term statistics can have a serious performance impact.
|
|
4944
6121
|
:param version: If `true`, returns the document version as part of a hit.
|
|
4945
|
-
:param version_type:
|
|
6122
|
+
:param version_type: The version type.
|
|
4946
6123
|
"""
|
|
4947
6124
|
if index in SKIP_IN_PATH:
|
|
4948
6125
|
raise ValueError("Empty value passed for parameter 'index'")
|
|
@@ -4959,41 +6136,41 @@ class AsyncElasticsearch(BaseClient):
|
|
|
4959
6136
|
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
4960
6137
|
if error_trace is not None:
|
|
4961
6138
|
__query["error_trace"] = error_trace
|
|
4962
|
-
if field_statistics is not None:
|
|
4963
|
-
__query["field_statistics"] = field_statistics
|
|
4964
|
-
if fields is not None:
|
|
4965
|
-
__query["fields"] = fields
|
|
4966
6139
|
if filter_path is not None:
|
|
4967
6140
|
__query["filter_path"] = filter_path
|
|
4968
6141
|
if human is not None:
|
|
4969
6142
|
__query["human"] = human
|
|
4970
|
-
if offsets is not None:
|
|
4971
|
-
__query["offsets"] = offsets
|
|
4972
|
-
if payloads is not None:
|
|
4973
|
-
__query["payloads"] = payloads
|
|
4974
|
-
if positions is not None:
|
|
4975
|
-
__query["positions"] = positions
|
|
4976
6143
|
if preference is not None:
|
|
4977
6144
|
__query["preference"] = preference
|
|
4978
6145
|
if pretty is not None:
|
|
4979
6146
|
__query["pretty"] = pretty
|
|
4980
6147
|
if realtime is not None:
|
|
4981
6148
|
__query["realtime"] = realtime
|
|
4982
|
-
if routing is not None:
|
|
4983
|
-
__query["routing"] = routing
|
|
4984
|
-
if term_statistics is not None:
|
|
4985
|
-
__query["term_statistics"] = term_statistics
|
|
4986
|
-
if version is not None:
|
|
4987
|
-
__query["version"] = version
|
|
4988
|
-
if version_type is not None:
|
|
4989
|
-
__query["version_type"] = version_type
|
|
4990
6149
|
if not __body:
|
|
4991
6150
|
if doc is not None:
|
|
4992
6151
|
__body["doc"] = doc
|
|
6152
|
+
if field_statistics is not None:
|
|
6153
|
+
__body["field_statistics"] = field_statistics
|
|
6154
|
+
if fields is not None:
|
|
6155
|
+
__body["fields"] = fields
|
|
4993
6156
|
if filter is not None:
|
|
4994
6157
|
__body["filter"] = filter
|
|
6158
|
+
if offsets is not None:
|
|
6159
|
+
__body["offsets"] = offsets
|
|
6160
|
+
if payloads is not None:
|
|
6161
|
+
__body["payloads"] = payloads
|
|
4995
6162
|
if per_field_analyzer is not None:
|
|
4996
6163
|
__body["per_field_analyzer"] = per_field_analyzer
|
|
6164
|
+
if positions is not None:
|
|
6165
|
+
__body["positions"] = positions
|
|
6166
|
+
if routing is not None:
|
|
6167
|
+
__body["routing"] = routing
|
|
6168
|
+
if term_statistics is not None:
|
|
6169
|
+
__body["term_statistics"] = term_statistics
|
|
6170
|
+
if version is not None:
|
|
6171
|
+
__body["version"] = version
|
|
6172
|
+
if version_type is not None:
|
|
6173
|
+
__body["version_type"] = version_type
|
|
4997
6174
|
if not __body:
|
|
4998
6175
|
__body = None # type: ignore[assignment]
|
|
4999
6176
|
__headers = {"accept": "application/json"}
|
|
@@ -5038,6 +6215,7 @@ class AsyncElasticsearch(BaseClient):
|
|
|
5038
6215
|
human: t.Optional[bool] = None,
|
|
5039
6216
|
if_primary_term: t.Optional[int] = None,
|
|
5040
6217
|
if_seq_no: t.Optional[int] = None,
|
|
6218
|
+
include_source_on_error: t.Optional[bool] = None,
|
|
5041
6219
|
lang: t.Optional[str] = None,
|
|
5042
6220
|
pretty: t.Optional[bool] = None,
|
|
5043
6221
|
refresh: t.Optional[
|
|
@@ -5059,46 +6237,67 @@ class AsyncElasticsearch(BaseClient):
|
|
|
5059
6237
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
5060
6238
|
) -> ObjectApiResponse[t.Any]:
|
|
5061
6239
|
"""
|
|
5062
|
-
|
|
5063
|
-
|
|
5064
|
-
|
|
5065
|
-
|
|
5066
|
-
|
|
5067
|
-
|
|
5068
|
-
|
|
5069
|
-
|
|
5070
|
-
|
|
5071
|
-
|
|
5072
|
-
|
|
5073
|
-
|
|
6240
|
+
.. raw:: html
|
|
6241
|
+
|
|
6242
|
+
<p>Update a document.</p>
|
|
6243
|
+
<p>Update a document by running a script or passing a partial document.</p>
|
|
6244
|
+
<p>If the Elasticsearch security features are enabled, you must have the <code>index</code> or <code>write</code> index privilege for the target index or index alias.</p>
|
|
6245
|
+
<p>The script can update, delete, or skip modifying the document.
|
|
6246
|
+
The API also supports passing a partial document, which is merged into the existing document.
|
|
6247
|
+
To fully replace an existing document, use the index API.
|
|
6248
|
+
This operation:</p>
|
|
6249
|
+
<ul>
|
|
6250
|
+
<li>Gets the document (collocated with the shard) from the index.</li>
|
|
6251
|
+
<li>Runs the specified script.</li>
|
|
6252
|
+
<li>Indexes the result.</li>
|
|
6253
|
+
</ul>
|
|
6254
|
+
<p>The document must still be reindexed, but using this API removes some network roundtrips and reduces chances of version conflicts between the GET and the index operation.</p>
|
|
6255
|
+
<p>The <code>_source</code> field must be enabled to use this API.
|
|
6256
|
+
In addition to <code>_source</code>, you can access the following variables through the <code>ctx</code> map: <code>_index</code>, <code>_type</code>, <code>_id</code>, <code>_version</code>, <code>_routing</code>, and <code>_now</code> (the current timestamp).</p>
|
|
6257
|
+
|
|
6258
|
+
|
|
6259
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-update.html>`_
|
|
6260
|
+
|
|
6261
|
+
:param index: The name of the target index. By default, the index is created
|
|
6262
|
+
automatically if it doesn't exist.
|
|
6263
|
+
:param id: A unique identifier for the document to be updated.
|
|
6264
|
+
:param detect_noop: If `true`, the `result` in the response is set to `noop`
|
|
6265
|
+
(no operation) when there are no changes to the document.
|
|
6266
|
+
:param doc: A partial update to an existing document. If both `doc` and `script`
|
|
6267
|
+
are specified, `doc` is ignored.
|
|
6268
|
+
:param doc_as_upsert: If `true`, use the contents of 'doc' as the value of 'upsert'.
|
|
6269
|
+
NOTE: Using ingest pipelines with `doc_as_upsert` is not supported.
|
|
5074
6270
|
:param if_primary_term: Only perform the operation if the document has this primary
|
|
5075
6271
|
term.
|
|
5076
6272
|
:param if_seq_no: Only perform the operation if the document has this sequence
|
|
5077
6273
|
number.
|
|
6274
|
+
:param include_source_on_error: True or false if to include the document source
|
|
6275
|
+
in the error message in case of parsing errors.
|
|
5078
6276
|
:param lang: The script language.
|
|
5079
6277
|
:param refresh: If 'true', Elasticsearch refreshes the affected shards to make
|
|
5080
|
-
this operation visible to search
|
|
5081
|
-
make this operation visible to search
|
|
5082
|
-
:param require_alias: If true
|
|
5083
|
-
:param retry_on_conflict:
|
|
6278
|
+
this operation visible to search. If 'wait_for', it waits for a refresh to
|
|
6279
|
+
make this operation visible to search. If 'false', it does nothing with refreshes.
|
|
6280
|
+
:param require_alias: If `true`, the destination must be an index alias.
|
|
6281
|
+
:param retry_on_conflict: The number of times the operation should be retried
|
|
5084
6282
|
when a conflict occurs.
|
|
5085
|
-
:param routing:
|
|
5086
|
-
:param script:
|
|
5087
|
-
:param scripted_upsert:
|
|
5088
|
-
|
|
5089
|
-
:param source:
|
|
5090
|
-
|
|
5091
|
-
:param source_excludes:
|
|
5092
|
-
:param source_includes:
|
|
5093
|
-
:param timeout:
|
|
5094
|
-
|
|
5095
|
-
The actual wait time could be longer, particularly
|
|
6283
|
+
:param routing: A custom value used to route operations to a specific shard.
|
|
6284
|
+
:param script: The script to run to update the document.
|
|
6285
|
+
:param scripted_upsert: If `true`, run the script whether or not the document
|
|
6286
|
+
exists.
|
|
6287
|
+
:param source: If `false`, turn off source retrieval. You can also specify a
|
|
6288
|
+
comma-separated list of the fields you want to retrieve.
|
|
6289
|
+
:param source_excludes: The source fields you want to exclude.
|
|
6290
|
+
:param source_includes: The source fields you want to retrieve.
|
|
6291
|
+
:param timeout: The period to wait for the following operations: dynamic mapping
|
|
6292
|
+
updates and waiting for active shards. Elasticsearch waits for at least the
|
|
6293
|
+
timeout period before failing. The actual wait time could be longer, particularly
|
|
6294
|
+
when multiple waits occur.
|
|
5096
6295
|
:param upsert: If the document does not already exist, the contents of 'upsert'
|
|
5097
|
-
are inserted as a new document. If the document exists, the 'script' is
|
|
5098
|
-
:param wait_for_active_shards: The number of shard
|
|
5099
|
-
before proceeding with the
|
|
5100
|
-
up to the total number of shards in the index (number_of_replicas
|
|
5101
|
-
|
|
6296
|
+
are inserted as a new document. If the document exists, the 'script' is run.
|
|
6297
|
+
:param wait_for_active_shards: The number of copies of each shard that must be
|
|
6298
|
+
active before proceeding with the operation. Set to 'all' or any positive
|
|
6299
|
+
integer up to the total number of shards in the index (`number_of_replicas`+1).
|
|
6300
|
+
The default value of `1` means it waits for each primary shard to be active.
|
|
5102
6301
|
"""
|
|
5103
6302
|
if index in SKIP_IN_PATH:
|
|
5104
6303
|
raise ValueError("Empty value passed for parameter 'index'")
|
|
@@ -5118,6 +6317,8 @@ class AsyncElasticsearch(BaseClient):
|
|
|
5118
6317
|
__query["if_primary_term"] = if_primary_term
|
|
5119
6318
|
if if_seq_no is not None:
|
|
5120
6319
|
__query["if_seq_no"] = if_seq_no
|
|
6320
|
+
if include_source_on_error is not None:
|
|
6321
|
+
__query["include_source_on_error"] = include_source_on_error
|
|
5121
6322
|
if lang is not None:
|
|
5122
6323
|
__query["lang"] = lang
|
|
5123
6324
|
if pretty is not None:
|
|
@@ -5224,82 +6425,166 @@ class AsyncElasticsearch(BaseClient):
|
|
|
5224
6425
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
5225
6426
|
) -> ObjectApiResponse[t.Any]:
|
|
5226
6427
|
"""
|
|
5227
|
-
|
|
5228
|
-
|
|
5229
|
-
|
|
5230
|
-
|
|
5231
|
-
|
|
6428
|
+
.. raw:: html
|
|
6429
|
+
|
|
6430
|
+
<p>Update documents.
|
|
6431
|
+
Updates documents that match the specified query.
|
|
6432
|
+
If no query is specified, performs an update on every document in the data stream or index without modifying the source, which is useful for picking up mapping changes.</p>
|
|
6433
|
+
<p>If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or alias:</p>
|
|
6434
|
+
<ul>
|
|
6435
|
+
<li><code>read</code></li>
|
|
6436
|
+
<li><code>index</code> or <code>write</code></li>
|
|
6437
|
+
</ul>
|
|
6438
|
+
<p>You can specify the query criteria in the request URI or the request body using the same syntax as the search API.</p>
|
|
6439
|
+
<p>When you submit an update by query request, Elasticsearch gets a snapshot of the data stream or index when it begins processing the request and updates matching documents using internal versioning.
|
|
6440
|
+
When the versions match, the document is updated and the version number is incremented.
|
|
6441
|
+
If a document changes between the time that the snapshot is taken and the update operation is processed, it results in a version conflict and the operation fails.
|
|
6442
|
+
You can opt to count version conflicts instead of halting and returning by setting <code>conflicts</code> to <code>proceed</code>.
|
|
6443
|
+
Note that if you opt to count version conflicts, the operation could attempt to update more documents from the source than <code>max_docs</code> until it has successfully updated <code>max_docs</code> documents or it has gone through every document in the source query.</p>
|
|
6444
|
+
<p>NOTE: Documents with a version equal to 0 cannot be updated using update by query because internal versioning does not support 0 as a valid version number.</p>
|
|
6445
|
+
<p>While processing an update by query request, Elasticsearch performs multiple search requests sequentially to find all of the matching documents.
|
|
6446
|
+
A bulk update request is performed for each batch of matching documents.
|
|
6447
|
+
Any query or update failures cause the update by query request to fail and the failures are shown in the response.
|
|
6448
|
+
Any update requests that completed successfully still stick, they are not rolled back.</p>
|
|
6449
|
+
<p><strong>Throttling update requests</strong></p>
|
|
6450
|
+
<p>To control the rate at which update by query issues batches of update operations, you can set <code>requests_per_second</code> to any positive decimal number.
|
|
6451
|
+
This pads each batch with a wait time to throttle the rate.
|
|
6452
|
+
Set <code>requests_per_second</code> to <code>-1</code> to turn off throttling.</p>
|
|
6453
|
+
<p>Throttling uses a wait time between batches so that the internal scroll requests can be given a timeout that takes the request padding into account.
|
|
6454
|
+
The padding time is the difference between the batch size divided by the <code>requests_per_second</code> and the time spent writing.
|
|
6455
|
+
By default the batch size is 1000, so if <code>requests_per_second</code> is set to <code>500</code>:</p>
|
|
6456
|
+
<pre><code>target_time = 1000 / 500 per second = 2 seconds
|
|
6457
|
+
wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds
|
|
6458
|
+
</code></pre>
|
|
6459
|
+
<p>Since the batch is issued as a single _bulk request, large batch sizes cause Elasticsearch to create many requests and wait before starting the next set.
|
|
6460
|
+
This is "bursty" instead of "smooth".</p>
|
|
6461
|
+
<p><strong>Slicing</strong></p>
|
|
6462
|
+
<p>Update by query supports sliced scroll to parallelize the update process.
|
|
6463
|
+
This can improve efficiency and provide a convenient way to break the request down into smaller parts.</p>
|
|
6464
|
+
<p>Setting <code>slices</code> to <code>auto</code> chooses a reasonable number for most data streams and indices.
|
|
6465
|
+
This setting will use one slice per shard, up to a certain limit.
|
|
6466
|
+
If there are multiple source data streams or indices, it will choose the number of slices based on the index or backing index with the smallest number of shards.</p>
|
|
6467
|
+
<p>Adding <code>slices</code> to <code>_update_by_query</code> just automates the manual process of creating sub-requests, which means it has some quirks:</p>
|
|
6468
|
+
<ul>
|
|
6469
|
+
<li>You can see these requests in the tasks APIs. These sub-requests are "child" tasks of the task for the request with slices.</li>
|
|
6470
|
+
<li>Fetching the status of the task for the request with <code>slices</code> only contains the status of completed slices.</li>
|
|
6471
|
+
<li>These sub-requests are individually addressable for things like cancellation and rethrottling.</li>
|
|
6472
|
+
<li>Rethrottling the request with <code>slices</code> will rethrottle the unfinished sub-request proportionally.</li>
|
|
6473
|
+
<li>Canceling the request with slices will cancel each sub-request.</li>
|
|
6474
|
+
<li>Due to the nature of slices each sub-request won't get a perfectly even portion of the documents. All documents will be addressed, but some slices may be larger than others. Expect larger slices to have a more even distribution.</li>
|
|
6475
|
+
<li>Parameters like <code>requests_per_second</code> and <code>max_docs</code> on a request with slices are distributed proportionally to each sub-request. Combine that with the point above about distribution being uneven and you should conclude that using <code>max_docs</code> with <code>slices</code> might not result in exactly <code>max_docs</code> documents being updated.</li>
|
|
6476
|
+
<li>Each sub-request gets a slightly different snapshot of the source data stream or index though these are all taken at approximately the same time.</li>
|
|
6477
|
+
</ul>
|
|
6478
|
+
<p>If you're slicing manually or otherwise tuning automatic slicing, keep in mind that:</p>
|
|
6479
|
+
<ul>
|
|
6480
|
+
<li>Query performance is most efficient when the number of slices is equal to the number of shards in the index or backing index. If that number is large (for example, 500), choose a lower number as too many slices hurts performance. Setting slices higher than the number of shards generally does not improve efficiency and adds overhead.</li>
|
|
6481
|
+
<li>Update performance scales linearly across available resources with the number of slices.</li>
|
|
6482
|
+
</ul>
|
|
6483
|
+
<p>Whether query or update performance dominates the runtime depends on the documents being reindexed and cluster resources.</p>
|
|
6484
|
+
<p><strong>Update the document source</strong></p>
|
|
6485
|
+
<p>Update by query supports scripts to update the document source.
|
|
6486
|
+
As with the update API, you can set <code>ctx.op</code> to change the operation that is performed.</p>
|
|
6487
|
+
<p>Set <code>ctx.op = "noop"</code> if your script decides that it doesn't have to make any changes.
|
|
6488
|
+
The update by query operation skips updating the document and increments the <code>noop</code> counter.</p>
|
|
6489
|
+
<p>Set <code>ctx.op = "delete"</code> if your script decides that the document should be deleted.
|
|
6490
|
+
The update by query operation deletes the document and increments the <code>deleted</code> counter.</p>
|
|
6491
|
+
<p>Update by query supports only <code>index</code>, <code>noop</code>, and <code>delete</code>.
|
|
6492
|
+
Setting <code>ctx.op</code> to anything else is an error.
|
|
6493
|
+
Setting any other field in <code>ctx</code> is an error.
|
|
6494
|
+
This API enables you to only modify the source of matching documents; you cannot move them.</p>
|
|
6495
|
+
|
|
6496
|
+
|
|
6497
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-update-by-query.html>`_
|
|
5232
6498
|
|
|
5233
|
-
:param index:
|
|
5234
|
-
|
|
5235
|
-
parameter or use `*` or `_all`.
|
|
6499
|
+
:param index: A comma-separated list of data streams, indices, and aliases to
|
|
6500
|
+
search. It supports wildcards (`*`). To search all data streams or indices,
|
|
6501
|
+
omit this parameter or use `*` or `_all`.
|
|
5236
6502
|
:param allow_no_indices: If `false`, the request returns an error if any wildcard
|
|
5237
6503
|
expression, index alias, or `_all` value targets only missing or closed indices.
|
|
5238
6504
|
This behavior applies even if the request targets other open indices. For
|
|
5239
6505
|
example, a request targeting `foo*,bar*` returns an error if an index starts
|
|
5240
6506
|
with `foo` but no index starts with `bar`.
|
|
5241
6507
|
:param analyze_wildcard: If `true`, wildcard and prefix queries are analyzed.
|
|
5242
|
-
|
|
5243
|
-
:param
|
|
5244
|
-
|
|
6508
|
+
This parameter can be used only when the `q` query string parameter is specified.
|
|
6509
|
+
:param analyzer: The analyzer to use for the query string. This parameter can
|
|
6510
|
+
be used only when the `q` query string parameter is specified.
|
|
6511
|
+
:param conflicts: The preferred behavior when update by query hits version conflicts:
|
|
6512
|
+
`abort` or `proceed`.
|
|
5245
6513
|
:param default_operator: The default operator for query string query: `AND` or
|
|
5246
|
-
`OR`.
|
|
5247
|
-
|
|
5248
|
-
|
|
5249
|
-
|
|
5250
|
-
|
|
5251
|
-
|
|
5252
|
-
|
|
5253
|
-
|
|
6514
|
+
`OR`. This parameter can be used only when the `q` query string parameter
|
|
6515
|
+
is specified.
|
|
6516
|
+
:param df: The field to use as default where no field prefix is given in the
|
|
6517
|
+
query string. This parameter can be used only when the `q` query string parameter
|
|
6518
|
+
is specified.
|
|
6519
|
+
:param expand_wildcards: The type of index that wildcard patterns can match.
|
|
6520
|
+
If the request can target data streams, this argument determines whether
|
|
6521
|
+
wildcard expressions match hidden data streams. It supports comma-separated
|
|
6522
|
+
values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`,
|
|
6523
|
+
`hidden`, `none`.
|
|
6524
|
+
:param from_: Skips the specified number of documents.
|
|
5254
6525
|
:param ignore_unavailable: If `false`, the request returns an error if it targets
|
|
5255
6526
|
a missing or closed index.
|
|
5256
6527
|
:param lenient: If `true`, format-based query failures (such as providing text
|
|
5257
|
-
to a numeric field) in the query string will be ignored.
|
|
6528
|
+
to a numeric field) in the query string will be ignored. This parameter can
|
|
6529
|
+
be used only when the `q` query string parameter is specified.
|
|
5258
6530
|
:param max_docs: The maximum number of documents to update.
|
|
5259
|
-
:param pipeline: ID of the pipeline to use to preprocess incoming documents.
|
|
6531
|
+
:param pipeline: The ID of the pipeline to use to preprocess incoming documents.
|
|
5260
6532
|
If the index has a default ingest pipeline specified, then setting the value
|
|
5261
6533
|
to `_none` disables the default ingest pipeline for this request. If a final
|
|
5262
6534
|
pipeline is configured it will always run, regardless of the value of this
|
|
5263
6535
|
parameter.
|
|
5264
|
-
:param preference:
|
|
5265
|
-
|
|
5266
|
-
:param q:
|
|
5267
|
-
:param query:
|
|
6536
|
+
:param preference: The node or shard the operation should be performed on. It
|
|
6537
|
+
is random by default.
|
|
6538
|
+
:param q: A query in the Lucene query string syntax.
|
|
6539
|
+
:param query: The documents to update using the Query DSL.
|
|
5268
6540
|
:param refresh: If `true`, Elasticsearch refreshes affected shards to make the
|
|
5269
|
-
operation visible to search.
|
|
6541
|
+
operation visible to search after the request completes. This is different
|
|
6542
|
+
than the update API's `refresh` parameter, which causes just the shard that
|
|
6543
|
+
received the request to be refreshed.
|
|
5270
6544
|
:param request_cache: If `true`, the request cache is used for this request.
|
|
6545
|
+
It defaults to the index-level setting.
|
|
5271
6546
|
:param requests_per_second: The throttle for this request in sub-requests per
|
|
5272
6547
|
second.
|
|
5273
|
-
:param routing:
|
|
6548
|
+
:param routing: A custom value used to route operations to a specific shard.
|
|
5274
6549
|
:param script: The script to run to update the document source or metadata when
|
|
5275
6550
|
updating.
|
|
5276
|
-
:param scroll:
|
|
5277
|
-
:param scroll_size:
|
|
5278
|
-
:param search_timeout:
|
|
5279
|
-
|
|
5280
|
-
|
|
6551
|
+
:param scroll: The period to retain the search context for scrolling.
|
|
6552
|
+
:param scroll_size: The size of the scroll request that powers the operation.
|
|
6553
|
+
:param search_timeout: An explicit timeout for each search request. By default,
|
|
6554
|
+
there is no timeout.
|
|
6555
|
+
:param search_type: The type of the search operation. Available options include
|
|
6556
|
+
`query_then_fetch` and `dfs_query_then_fetch`.
|
|
5281
6557
|
:param slice: Slice the request manually using the provided slice ID and total
|
|
5282
6558
|
number of slices.
|
|
5283
6559
|
:param slices: The number of slices this task should be divided into.
|
|
5284
6560
|
:param sort: A comma-separated list of <field>:<direction> pairs.
|
|
5285
|
-
:param stats:
|
|
5286
|
-
:param terminate_after:
|
|
6561
|
+
:param stats: The specific `tag` of the request for logging and statistical purposes.
|
|
6562
|
+
:param terminate_after: The maximum number of documents to collect for each shard.
|
|
5287
6563
|
If a query reaches this limit, Elasticsearch terminates the query early.
|
|
5288
|
-
Elasticsearch collects documents before sorting. Use with caution.
|
|
5289
|
-
applies this parameter to each shard handling the request.
|
|
5290
|
-
let Elasticsearch perform early termination automatically.
|
|
5291
|
-
this parameter for requests that target data streams with
|
|
5292
|
-
across multiple data tiers.
|
|
5293
|
-
:param timeout:
|
|
5294
|
-
dynamic mapping updates, waiting for active shards.
|
|
6564
|
+
Elasticsearch collects documents before sorting. IMPORTANT: Use with caution.
|
|
6565
|
+
Elasticsearch applies this parameter to each shard handling the request.
|
|
6566
|
+
When possible, let Elasticsearch perform early termination automatically.
|
|
6567
|
+
Avoid specifying this parameter for requests that target data streams with
|
|
6568
|
+
backing indices across multiple data tiers.
|
|
6569
|
+
:param timeout: The period each update request waits for the following operations:
|
|
6570
|
+
dynamic mapping updates, waiting for active shards. By default, it is one
|
|
6571
|
+
minute. This guarantees Elasticsearch waits for at least the timeout before
|
|
6572
|
+
failing. The actual wait time could be longer, particularly when multiple
|
|
6573
|
+
waits occur.
|
|
5295
6574
|
:param version: If `true`, returns the document version as part of a hit.
|
|
5296
6575
|
:param version_type: Should the document increment the version number (internal)
|
|
5297
6576
|
on hit or not (reindex)
|
|
5298
6577
|
:param wait_for_active_shards: The number of shard copies that must be active
|
|
5299
6578
|
before proceeding with the operation. Set to `all` or any positive integer
|
|
5300
|
-
up to the total number of shards in the index (`number_of_replicas+1`).
|
|
6579
|
+
up to the total number of shards in the index (`number_of_replicas+1`). The
|
|
6580
|
+
`timeout` parameter controls how long each write request waits for unavailable
|
|
6581
|
+
shards to become available. Both work exactly the way they work in the bulk
|
|
6582
|
+
API.
|
|
5301
6583
|
:param wait_for_completion: If `true`, the request blocks until the operation
|
|
5302
|
-
is complete.
|
|
6584
|
+
is complete. If `false`, Elasticsearch performs some preflight checks, launches
|
|
6585
|
+
the request, and returns a task ID that you can use to cancel or get the
|
|
6586
|
+
status of the task. Elasticsearch creates a record of this task as a document
|
|
6587
|
+
at `.tasks/task/${taskId}`.
|
|
5303
6588
|
"""
|
|
5304
6589
|
if index in SKIP_IN_PATH:
|
|
5305
6590
|
raise ValueError("Empty value passed for parameter 'index'")
|
|
@@ -5422,16 +6707,18 @@ class AsyncElasticsearch(BaseClient):
|
|
|
5422
6707
|
requests_per_second: t.Optional[float] = None,
|
|
5423
6708
|
) -> ObjectApiResponse[t.Any]:
|
|
5424
6709
|
"""
|
|
5425
|
-
|
|
5426
|
-
for a particular update by query operation. Rethrottling that speeds up the query
|
|
5427
|
-
takes effect immediately but rethrotting that slows down the query takes effect
|
|
5428
|
-
after completing the current batch to prevent scroll timeouts.
|
|
6710
|
+
.. raw:: html
|
|
5429
6711
|
|
|
5430
|
-
|
|
6712
|
+
<p>Throttle an update by query operation.</p>
|
|
6713
|
+
<p>Change the number of requests per second for a particular update by query operation.
|
|
6714
|
+
Rethrottling that speeds up the query takes effect immediately but rethrotting that slows down the query takes effect after completing the current batch to prevent scroll timeouts.</p>
|
|
6715
|
+
|
|
6716
|
+
|
|
6717
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.18/docs-update-by-query.html#docs-update-by-query-rethrottle>`_
|
|
5431
6718
|
|
|
5432
6719
|
:param task_id: The ID for the task.
|
|
5433
6720
|
:param requests_per_second: The throttle for this request in sub-requests per
|
|
5434
|
-
second.
|
|
6721
|
+
second. To turn off throttling, set it to `-1`.
|
|
5435
6722
|
"""
|
|
5436
6723
|
if task_id in SKIP_IN_PATH:
|
|
5437
6724
|
raise ValueError("Empty value passed for parameter 'task_id'")
|