elasticsearch 9.0.2__py3-none-any.whl → 9.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elasticsearch/_async/client/__init__.py +59 -202
- elasticsearch/_async/client/cat.py +1011 -59
- elasticsearch/_async/client/cluster.py +14 -4
- elasticsearch/_async/client/eql.py +10 -2
- elasticsearch/_async/client/esql.py +33 -10
- elasticsearch/_async/client/indices.py +88 -44
- elasticsearch/_async/client/inference.py +108 -3
- elasticsearch/_async/client/ingest.py +0 -7
- elasticsearch/_async/client/license.py +4 -4
- elasticsearch/_async/client/ml.py +6 -17
- elasticsearch/_async/client/monitoring.py +1 -1
- elasticsearch/_async/client/rollup.py +1 -22
- elasticsearch/_async/client/security.py +11 -17
- elasticsearch/_async/client/snapshot.py +6 -0
- elasticsearch/_async/client/sql.py +1 -1
- elasticsearch/_async/client/synonyms.py +1 -0
- elasticsearch/_async/client/transform.py +60 -0
- elasticsearch/_async/client/watcher.py +4 -2
- elasticsearch/_sync/client/__init__.py +59 -202
- elasticsearch/_sync/client/cat.py +1011 -59
- elasticsearch/_sync/client/cluster.py +14 -4
- elasticsearch/_sync/client/eql.py +10 -2
- elasticsearch/_sync/client/esql.py +33 -10
- elasticsearch/_sync/client/indices.py +88 -44
- elasticsearch/_sync/client/inference.py +108 -3
- elasticsearch/_sync/client/ingest.py +0 -7
- elasticsearch/_sync/client/license.py +4 -4
- elasticsearch/_sync/client/ml.py +6 -17
- elasticsearch/_sync/client/monitoring.py +1 -1
- elasticsearch/_sync/client/rollup.py +1 -22
- elasticsearch/_sync/client/security.py +11 -17
- elasticsearch/_sync/client/snapshot.py +6 -0
- elasticsearch/_sync/client/sql.py +1 -1
- elasticsearch/_sync/client/synonyms.py +1 -0
- elasticsearch/_sync/client/transform.py +60 -0
- elasticsearch/_sync/client/watcher.py +4 -2
- elasticsearch/_version.py +1 -1
- elasticsearch/compat.py +5 -0
- elasticsearch/dsl/__init__.py +2 -1
- elasticsearch/dsl/_async/document.py +84 -0
- elasticsearch/dsl/_sync/document.py +84 -0
- elasticsearch/dsl/document_base.py +219 -16
- elasticsearch/dsl/field.py +245 -57
- elasticsearch/dsl/query.py +7 -4
- elasticsearch/dsl/response/aggs.py +1 -1
- elasticsearch/dsl/types.py +125 -88
- elasticsearch/dsl/utils.py +2 -2
- elasticsearch/{dsl/_sync/_sync_check → esql}/__init__.py +3 -0
- elasticsearch/esql/esql.py +1156 -0
- elasticsearch/esql/functions.py +1750 -0
- {elasticsearch-9.0.2.dist-info → elasticsearch-9.0.4.dist-info}/METADATA +1 -3
- {elasticsearch-9.0.2.dist-info → elasticsearch-9.0.4.dist-info}/RECORD +55 -59
- elasticsearch/dsl/_sync/_sync_check/document.py +0 -514
- elasticsearch/dsl/_sync/_sync_check/faceted_search.py +0 -50
- elasticsearch/dsl/_sync/_sync_check/index.py +0 -597
- elasticsearch/dsl/_sync/_sync_check/mapping.py +0 -49
- elasticsearch/dsl/_sync/_sync_check/search.py +0 -230
- elasticsearch/dsl/_sync/_sync_check/update_by_query.py +0 -45
- {elasticsearch-9.0.2.dist-info → elasticsearch-9.0.4.dist-info}/WHEEL +0 -0
- {elasticsearch-9.0.2.dist-info → elasticsearch-9.0.4.dist-info}/licenses/LICENSE +0 -0
- {elasticsearch-9.0.2.dist-info → elasticsearch-9.0.4.dist-info}/licenses/NOTICE +0 -0
|
@@ -606,6 +606,7 @@ class Elasticsearch(BaseClient):
|
|
|
606
606
|
<li>JavaScript: Check out <code>client.helpers.*</code></li>
|
|
607
607
|
<li>.NET: Check out <code>BulkAllObservable</code></li>
|
|
608
608
|
<li>PHP: Check out bulk indexing.</li>
|
|
609
|
+
<li>Ruby: Check out <code>Elasticsearch::Helpers::BulkHelper</code></li>
|
|
609
610
|
</ul>
|
|
610
611
|
<p><strong>Submitting bulk requests with cURL</strong></p>
|
|
611
612
|
<p>If you're providing text file input to <code>curl</code>, you must use the <code>--data-binary</code> flag instead of plain <code>-d</code>.
|
|
@@ -635,6 +636,8 @@ class Elasticsearch(BaseClient):
|
|
|
635
636
|
Imagine a <code>_bulk?refresh=wait_for</code> request with three documents in it that happen to be routed to different shards in an index with five shards.
|
|
636
637
|
The request will only wait for those three shards to refresh.
|
|
637
638
|
The other two shards that make up the index do not participate in the <code>_bulk</code> request at all.</p>
|
|
639
|
+
<p>You might want to disable the refresh interval temporarily to improve indexing throughput for large bulk requests.
|
|
640
|
+
Refer to the linked documentation for step-by-step instructions using the index settings API.</p>
|
|
638
641
|
|
|
639
642
|
|
|
640
643
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-bulk>`_
|
|
@@ -1027,10 +1030,7 @@ class Elasticsearch(BaseClient):
|
|
|
1027
1030
|
error_trace: t.Optional[bool] = None,
|
|
1028
1031
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1029
1032
|
human: t.Optional[bool] = None,
|
|
1030
|
-
if_primary_term: t.Optional[int] = None,
|
|
1031
|
-
if_seq_no: t.Optional[int] = None,
|
|
1032
1033
|
include_source_on_error: t.Optional[bool] = None,
|
|
1033
|
-
op_type: t.Optional[t.Union[str, t.Literal["create", "index"]]] = None,
|
|
1034
1034
|
pipeline: t.Optional[str] = None,
|
|
1035
1035
|
pretty: t.Optional[bool] = None,
|
|
1036
1036
|
refresh: t.Optional[
|
|
@@ -1115,18 +1115,8 @@ class Elasticsearch(BaseClient):
|
|
|
1115
1115
|
:param id: A unique identifier for the document. To automatically generate a
|
|
1116
1116
|
document ID, use the `POST /<target>/_doc/` request format.
|
|
1117
1117
|
:param document:
|
|
1118
|
-
:param if_primary_term: Only perform the operation if the document has this primary
|
|
1119
|
-
term.
|
|
1120
|
-
:param if_seq_no: Only perform the operation if the document has this sequence
|
|
1121
|
-
number.
|
|
1122
1118
|
:param include_source_on_error: True or false if to include the document source
|
|
1123
1119
|
in the error message in case of parsing errors.
|
|
1124
|
-
:param op_type: Set to `create` to only index the document if it does not already
|
|
1125
|
-
exist (put if absent). If a document with the specified `_id` already exists,
|
|
1126
|
-
the indexing operation will fail. The behavior is the same as using the `<index>/_create`
|
|
1127
|
-
endpoint. If a document ID is specified, this paramater defaults to `index`.
|
|
1128
|
-
Otherwise, it defaults to `create`. If the request targets a data stream,
|
|
1129
|
-
an `op_type` of `create` is required.
|
|
1130
1120
|
:param pipeline: The ID of the pipeline to use to preprocess incoming documents.
|
|
1131
1121
|
If the index has a default ingest pipeline specified, setting the value to
|
|
1132
1122
|
`_none` turns off the default ingest pipeline for this request. If a final
|
|
@@ -1178,14 +1168,8 @@ class Elasticsearch(BaseClient):
|
|
|
1178
1168
|
__query["filter_path"] = filter_path
|
|
1179
1169
|
if human is not None:
|
|
1180
1170
|
__query["human"] = human
|
|
1181
|
-
if if_primary_term is not None:
|
|
1182
|
-
__query["if_primary_term"] = if_primary_term
|
|
1183
|
-
if if_seq_no is not None:
|
|
1184
|
-
__query["if_seq_no"] = if_seq_no
|
|
1185
1171
|
if include_source_on_error is not None:
|
|
1186
1172
|
__query["include_source_on_error"] = include_source_on_error
|
|
1187
|
-
if op_type is not None:
|
|
1188
|
-
__query["op_type"] = op_type
|
|
1189
1173
|
if pipeline is not None:
|
|
1190
1174
|
__query["pipeline"] = pipeline
|
|
1191
1175
|
if pretty is not None:
|
|
@@ -1341,7 +1325,7 @@ class Elasticsearch(BaseClient):
|
|
|
1341
1325
|
)
|
|
1342
1326
|
|
|
1343
1327
|
@_rewrite_parameters(
|
|
1344
|
-
body_fields=("max_docs", "query", "slice"),
|
|
1328
|
+
body_fields=("max_docs", "query", "slice", "sort"),
|
|
1345
1329
|
parameter_aliases={"from": "from_"},
|
|
1346
1330
|
)
|
|
1347
1331
|
def delete_by_query(
|
|
@@ -1385,7 +1369,12 @@ class Elasticsearch(BaseClient):
|
|
|
1385
1369
|
] = None,
|
|
1386
1370
|
slice: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1387
1371
|
slices: t.Optional[t.Union[int, t.Union[str, t.Literal["auto"]]]] = None,
|
|
1388
|
-
sort: t.Optional[
|
|
1372
|
+
sort: t.Optional[
|
|
1373
|
+
t.Union[
|
|
1374
|
+
t.Sequence[t.Union[str, t.Mapping[str, t.Any]]],
|
|
1375
|
+
t.Union[str, t.Mapping[str, t.Any]],
|
|
1376
|
+
]
|
|
1377
|
+
] = None,
|
|
1389
1378
|
stats: t.Optional[t.Sequence[str]] = None,
|
|
1390
1379
|
terminate_after: t.Optional[int] = None,
|
|
1391
1380
|
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
@@ -1517,7 +1506,7 @@ class Elasticsearch(BaseClient):
|
|
|
1517
1506
|
:param slice: Slice the request manually using the provided slice ID and total
|
|
1518
1507
|
number of slices.
|
|
1519
1508
|
:param slices: The number of slices this task should be divided into.
|
|
1520
|
-
:param sort: A
|
|
1509
|
+
:param sort: A sort object that specifies the order of deleted documents.
|
|
1521
1510
|
:param stats: The specific `tag` of the request for logging and statistical purposes.
|
|
1522
1511
|
:param terminate_after: The maximum number of documents to collect for each shard.
|
|
1523
1512
|
If a query reaches this limit, Elasticsearch terminates the query early.
|
|
@@ -1607,8 +1596,6 @@ class Elasticsearch(BaseClient):
|
|
|
1607
1596
|
__query["search_type"] = search_type
|
|
1608
1597
|
if slices is not None:
|
|
1609
1598
|
__query["slices"] = slices
|
|
1610
|
-
if sort is not None:
|
|
1611
|
-
__query["sort"] = sort
|
|
1612
1599
|
if stats is not None:
|
|
1613
1600
|
__query["stats"] = stats
|
|
1614
1601
|
if terminate_after is not None:
|
|
@@ -1628,6 +1615,8 @@ class Elasticsearch(BaseClient):
|
|
|
1628
1615
|
__body["query"] = query
|
|
1629
1616
|
if slice is not None:
|
|
1630
1617
|
__body["slice"] = slice
|
|
1618
|
+
if sort is not None:
|
|
1619
|
+
__body["sort"] = sort
|
|
1631
1620
|
__headers = {"accept": "application/json", "content-type": "application/json"}
|
|
1632
1621
|
return self.perform_request( # type: ignore[return-value]
|
|
1633
1622
|
"POST",
|
|
@@ -2322,7 +2311,7 @@ class Elasticsearch(BaseClient):
|
|
|
2322
2311
|
:param index: The name of the index that contains the document.
|
|
2323
2312
|
:param id: A unique document identifier.
|
|
2324
2313
|
:param force_synthetic_source: Indicates whether the request forces synthetic
|
|
2325
|
-
`_source`. Use this
|
|
2314
|
+
`_source`. Use this parameter to test if the mapping supports synthetic `_source`
|
|
2326
2315
|
and to get a sense of the worst case performance. Fetches with this parameter
|
|
2327
2316
|
enabled will be slower than enabling synthetic source natively in the index.
|
|
2328
2317
|
:param preference: The node or shard the operation should be performed on. By
|
|
@@ -2353,8 +2342,8 @@ class Elasticsearch(BaseClient):
|
|
|
2353
2342
|
:param stored_fields: A comma-separated list of stored fields to return as part
|
|
2354
2343
|
of a hit. If no fields are specified, no stored fields are included in the
|
|
2355
2344
|
response. If this field is specified, the `_source` parameter defaults to
|
|
2356
|
-
`false`. Only leaf fields can be retrieved with the `
|
|
2357
|
-
Object fields can't be returned
|
|
2345
|
+
`false`. Only leaf fields can be retrieved with the `stored_fields` option.
|
|
2346
|
+
Object fields can't be returned; if specified, the request fails.
|
|
2358
2347
|
:param version: The version number for concurrency control. It must match the
|
|
2359
2348
|
current version of the document for the request to succeed.
|
|
2360
2349
|
:param version_type: The version type.
|
|
@@ -2558,7 +2547,6 @@ class Elasticsearch(BaseClient):
|
|
|
2558
2547
|
source: t.Optional[t.Union[bool, t.Union[str, t.Sequence[str]]]] = None,
|
|
2559
2548
|
source_excludes: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
2560
2549
|
source_includes: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
2561
|
-
stored_fields: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
2562
2550
|
version: t.Optional[int] = None,
|
|
2563
2551
|
version_type: t.Optional[
|
|
2564
2552
|
t.Union[str, t.Literal["external", "external_gte", "force", "internal"]]
|
|
@@ -2595,8 +2583,6 @@ class Elasticsearch(BaseClient):
|
|
|
2595
2583
|
the response.
|
|
2596
2584
|
:param source_includes: A comma-separated list of source fields to include in
|
|
2597
2585
|
the response.
|
|
2598
|
-
:param stored_fields: A comma-separated list of stored fields to return as part
|
|
2599
|
-
of a hit.
|
|
2600
2586
|
:param version: The version number for concurrency control. It must match the
|
|
2601
2587
|
current version of the document for the request to succeed.
|
|
2602
2588
|
:param version_type: The version type.
|
|
@@ -2630,8 +2616,6 @@ class Elasticsearch(BaseClient):
|
|
|
2630
2616
|
__query["_source_excludes"] = source_excludes
|
|
2631
2617
|
if source_includes is not None:
|
|
2632
2618
|
__query["_source_includes"] = source_includes
|
|
2633
|
-
if stored_fields is not None:
|
|
2634
|
-
__query["stored_fields"] = stored_fields
|
|
2635
2619
|
if version is not None:
|
|
2636
2620
|
__query["version"] = version
|
|
2637
2621
|
if version_type is not None:
|
|
@@ -2740,6 +2724,7 @@ class Elasticsearch(BaseClient):
|
|
|
2740
2724
|
t.Union[bool, str, t.Literal["false", "true", "wait_for"]]
|
|
2741
2725
|
] = None,
|
|
2742
2726
|
require_alias: t.Optional[bool] = None,
|
|
2727
|
+
require_data_stream: t.Optional[bool] = None,
|
|
2743
2728
|
routing: t.Optional[str] = None,
|
|
2744
2729
|
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
2745
2730
|
version: t.Optional[int] = None,
|
|
@@ -2875,6 +2860,8 @@ class Elasticsearch(BaseClient):
|
|
|
2875
2860
|
this operation visible to search. If `wait_for`, it waits for a refresh to
|
|
2876
2861
|
make this operation visible to search. If `false`, it does nothing with refreshes.
|
|
2877
2862
|
:param require_alias: If `true`, the destination must be an index alias.
|
|
2863
|
+
:param require_data_stream: If `true`, the request's actions must target a data
|
|
2864
|
+
stream (existing or to be created).
|
|
2878
2865
|
:param routing: A custom value that is used to route operations to a specific
|
|
2879
2866
|
shard.
|
|
2880
2867
|
:param timeout: The period the request waits for the following operations: automatic
|
|
@@ -2936,6 +2923,8 @@ class Elasticsearch(BaseClient):
|
|
|
2936
2923
|
__query["refresh"] = refresh
|
|
2937
2924
|
if require_alias is not None:
|
|
2938
2925
|
__query["require_alias"] = require_alias
|
|
2926
|
+
if require_data_stream is not None:
|
|
2927
|
+
__query["require_data_stream"] = require_data_stream
|
|
2939
2928
|
if routing is not None:
|
|
2940
2929
|
__query["routing"] = routing
|
|
2941
2930
|
if timeout is not None:
|
|
@@ -2971,7 +2960,8 @@ class Elasticsearch(BaseClient):
|
|
|
2971
2960
|
.. raw:: html
|
|
2972
2961
|
|
|
2973
2962
|
<p>Get cluster info.
|
|
2974
|
-
Get basic build, version, and cluster information
|
|
2963
|
+
Get basic build, version, and cluster information.
|
|
2964
|
+
::: In Serverless, this API is retained for backward compatibility only. Some response fields, such as the version number, should be ignored.</p>
|
|
2975
2965
|
|
|
2976
2966
|
|
|
2977
2967
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/group/endpoint-info>`_
|
|
@@ -3584,8 +3574,7 @@ class Elasticsearch(BaseClient):
|
|
|
3584
3574
|
:param expand_wildcards: The type of index that wildcard patterns can match.
|
|
3585
3575
|
If the request can target data streams, this argument determines whether
|
|
3586
3576
|
wildcard expressions match hidden data streams. It supports comma-separated
|
|
3587
|
-
values, such as `open,hidden`.
|
|
3588
|
-
`hidden`, `none`.
|
|
3577
|
+
values, such as `open,hidden`.
|
|
3589
3578
|
:param ignore_unavailable: If `false`, the request returns an error if it targets
|
|
3590
3579
|
a missing or closed index.
|
|
3591
3580
|
:param index_filter: Filter indices if the provided query rewrites to `match_none`
|
|
@@ -3885,110 +3874,14 @@ class Elasticsearch(BaseClient):
|
|
|
3885
3874
|
In this case, the response includes a count of the version conflicts that were encountered.
|
|
3886
3875
|
Note that the handling of other error types is unaffected by the <code>conflicts</code> property.
|
|
3887
3876
|
Additionally, if you opt to count version conflicts, the operation could attempt to reindex more documents from the source than <code>max_docs</code> until it has successfully indexed <code>max_docs</code> documents into the target or it has gone through every document in the source query.</p>
|
|
3888
|
-
<p>
|
|
3889
|
-
The last document written will "win" but the order isn't usually predictable so it is not a good idea to rely on this behavior.
|
|
3890
|
-
Instead, make sure that IDs are unique by using a script.</p>
|
|
3891
|
-
<p><strong>Running reindex asynchronously</strong></p>
|
|
3892
|
-
<p>If the request contains <code>wait_for_completion=false</code>, Elasticsearch performs some preflight checks, launches the request, and returns a task you can use to cancel or get the status of the task.
|
|
3893
|
-
Elasticsearch creates a record of this task as a document at <code>_tasks/<task_id></code>.</p>
|
|
3894
|
-
<p><strong>Reindex from multiple sources</strong></p>
|
|
3895
|
-
<p>If you have many sources to reindex it is generally better to reindex them one at a time rather than using a glob pattern to pick up multiple sources.
|
|
3896
|
-
That way you can resume the process if there are any errors by removing the partially completed source and starting over.
|
|
3897
|
-
It also makes parallelizing the process fairly simple: split the list of sources to reindex and run each list in parallel.</p>
|
|
3898
|
-
<p>For example, you can use a bash script like this:</p>
|
|
3899
|
-
<pre><code>for index in i1 i2 i3 i4 i5; do
|
|
3900
|
-
curl -HContent-Type:application/json -XPOST localhost:9200/_reindex?pretty -d'{
|
|
3901
|
-
"source": {
|
|
3902
|
-
"index": "'$index'"
|
|
3903
|
-
},
|
|
3904
|
-
"dest": {
|
|
3905
|
-
"index": "'$index'-reindexed"
|
|
3906
|
-
}
|
|
3907
|
-
}'
|
|
3908
|
-
done
|
|
3909
|
-
</code></pre>
|
|
3910
|
-
<p><strong>Throttling</strong></p>
|
|
3911
|
-
<p>Set <code>requests_per_second</code> to any positive decimal number (<code>1.4</code>, <code>6</code>, <code>1000</code>, for example) to throttle the rate at which reindex issues batches of index operations.
|
|
3912
|
-
Requests are throttled by padding each batch with a wait time.
|
|
3913
|
-
To turn off throttling, set <code>requests_per_second</code> to <code>-1</code>.</p>
|
|
3914
|
-
<p>The throttling is done by waiting between batches so that the scroll that reindex uses internally can be given a timeout that takes into account the padding.
|
|
3915
|
-
The padding time is the difference between the batch size divided by the <code>requests_per_second</code> and the time spent writing.
|
|
3916
|
-
By default the batch size is <code>1000</code>, so if <code>requests_per_second</code> is set to <code>500</code>:</p>
|
|
3917
|
-
<pre><code>target_time = 1000 / 500 per second = 2 seconds
|
|
3918
|
-
wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds
|
|
3919
|
-
</code></pre>
|
|
3920
|
-
<p>Since the batch is issued as a single bulk request, large batch sizes cause Elasticsearch to create many requests and then wait for a while before starting the next set.
|
|
3921
|
-
This is "bursty" instead of "smooth".</p>
|
|
3922
|
-
<p><strong>Slicing</strong></p>
|
|
3923
|
-
<p>Reindex supports sliced scroll to parallelize the reindexing process.
|
|
3924
|
-
This parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts.</p>
|
|
3925
|
-
<p>NOTE: Reindexing from remote clusters does not support manual or automatic slicing.</p>
|
|
3926
|
-
<p>You can slice a reindex request manually by providing a slice ID and total number of slices to each request.
|
|
3927
|
-
You can also let reindex automatically parallelize by using sliced scroll to slice on <code>_id</code>.
|
|
3928
|
-
The <code>slices</code> parameter specifies the number of slices to use.</p>
|
|
3929
|
-
<p>Adding <code>slices</code> to the reindex request just automates the manual process, creating sub-requests which means it has some quirks:</p>
|
|
3877
|
+
<p>It's recommended to reindex on indices with a green status. Reindexing can fail when a node shuts down or crashes.</p>
|
|
3930
3878
|
<ul>
|
|
3931
|
-
<li>
|
|
3932
|
-
<li>
|
|
3933
|
-
|
|
3934
|
-
|
|
3935
|
-
<li>Canceling the request with <code>slices</code> will cancel each sub-request.</li>
|
|
3936
|
-
<li>Due to the nature of <code>slices</code>, each sub-request won't get a perfectly even portion of the documents. All documents will be addressed, but some slices may be larger than others. Expect larger slices to have a more even distribution.</li>
|
|
3937
|
-
<li>Parameters like <code>requests_per_second</code> and <code>max_docs</code> on a request with <code>slices</code> are distributed proportionally to each sub-request. Combine that with the previous point about distribution being uneven and you should conclude that using <code>max_docs</code> with <code>slices</code> might not result in exactly <code>max_docs</code> documents being reindexed.</li>
|
|
3938
|
-
<li>Each sub-request gets a slightly different snapshot of the source, though these are all taken at approximately the same time.</li>
|
|
3879
|
+
<li>When requested with <code>wait_for_completion=true</code> (default), the request fails if the node shuts down.</li>
|
|
3880
|
+
<li>When requested with <code>wait_for_completion=false</code>, a task id is returned, for use with the task management APIs. The task may disappear or fail if the node shuts down.
|
|
3881
|
+
When retrying a failed reindex operation, it might be necessary to set <code>conflicts=proceed</code> or to first delete the partial destination index.
|
|
3882
|
+
Additionally, dry runs, checking disk space, and fetching index recovery information can help address the root cause.</li>
|
|
3939
3883
|
</ul>
|
|
3940
|
-
<p>
|
|
3941
|
-
If slicing manually or otherwise tuning automatic slicing, use the following guidelines.</p>
|
|
3942
|
-
<p>Query performance is most efficient when the number of slices is equal to the number of shards in the index.
|
|
3943
|
-
If that number is large (for example, <code>500</code>), choose a lower number as too many slices will hurt performance.
|
|
3944
|
-
Setting slices higher than the number of shards generally does not improve efficiency and adds overhead.</p>
|
|
3945
|
-
<p>Indexing performance scales linearly across available resources with the number of slices.</p>
|
|
3946
|
-
<p>Whether query or indexing performance dominates the runtime depends on the documents being reindexed and cluster resources.</p>
|
|
3947
|
-
<p><strong>Modify documents during reindexing</strong></p>
|
|
3948
|
-
<p>Like <code>_update_by_query</code>, reindex operations support a script that modifies the document.
|
|
3949
|
-
Unlike <code>_update_by_query</code>, the script is allowed to modify the document's metadata.</p>
|
|
3950
|
-
<p>Just as in <code>_update_by_query</code>, you can set <code>ctx.op</code> to change the operation that is run on the destination.
|
|
3951
|
-
For example, set <code>ctx.op</code> to <code>noop</code> if your script decides that the document doesn’t have to be indexed in the destination. This "no operation" will be reported in the <code>noop</code> counter in the response body.
|
|
3952
|
-
Set <code>ctx.op</code> to <code>delete</code> if your script decides that the document must be deleted from the destination.
|
|
3953
|
-
The deletion will be reported in the <code>deleted</code> counter in the response body.
|
|
3954
|
-
Setting <code>ctx.op</code> to anything else will return an error, as will setting any other field in <code>ctx</code>.</p>
|
|
3955
|
-
<p>Think of the possibilities! Just be careful; you are able to change:</p>
|
|
3956
|
-
<ul>
|
|
3957
|
-
<li><code>_id</code></li>
|
|
3958
|
-
<li><code>_index</code></li>
|
|
3959
|
-
<li><code>_version</code></li>
|
|
3960
|
-
<li><code>_routing</code></li>
|
|
3961
|
-
</ul>
|
|
3962
|
-
<p>Setting <code>_version</code> to <code>null</code> or clearing it from the <code>ctx</code> map is just like not sending the version in an indexing request.
|
|
3963
|
-
It will cause the document to be overwritten in the destination regardless of the version on the target or the version type you use in the reindex API.</p>
|
|
3964
|
-
<p><strong>Reindex from remote</strong></p>
|
|
3965
|
-
<p>Reindex supports reindexing from a remote Elasticsearch cluster.
|
|
3966
|
-
The <code>host</code> parameter must contain a scheme, host, port, and optional path.
|
|
3967
|
-
The <code>username</code> and <code>password</code> parameters are optional and when they are present the reindex operation will connect to the remote Elasticsearch node using basic authentication.
|
|
3968
|
-
Be sure to use HTTPS when using basic authentication or the password will be sent in plain text.
|
|
3969
|
-
There are a range of settings available to configure the behavior of the HTTPS connection.</p>
|
|
3970
|
-
<p>When using Elastic Cloud, it is also possible to authenticate against the remote cluster through the use of a valid API key.
|
|
3971
|
-
Remote hosts must be explicitly allowed with the <code>reindex.remote.whitelist</code> setting.
|
|
3972
|
-
It can be set to a comma delimited list of allowed remote host and port combinations.
|
|
3973
|
-
Scheme is ignored; only the host and port are used.
|
|
3974
|
-
For example:</p>
|
|
3975
|
-
<pre><code>reindex.remote.whitelist: [otherhost:9200, another:9200, 127.0.10.*:9200, localhost:*"]
|
|
3976
|
-
</code></pre>
|
|
3977
|
-
<p>The list of allowed hosts must be configured on any nodes that will coordinate the reindex.
|
|
3978
|
-
This feature should work with remote clusters of any version of Elasticsearch.
|
|
3979
|
-
This should enable you to upgrade from any version of Elasticsearch to the current version by reindexing from a cluster of the old version.</p>
|
|
3980
|
-
<p>WARNING: Elasticsearch does not support forward compatibility across major versions.
|
|
3981
|
-
For example, you cannot reindex from a 7.x cluster into a 6.x cluster.</p>
|
|
3982
|
-
<p>To enable queries sent to older versions of Elasticsearch, the <code>query</code> parameter is sent directly to the remote host without validation or modification.</p>
|
|
3983
|
-
<p>NOTE: Reindexing from remote clusters does not support manual or automatic slicing.</p>
|
|
3984
|
-
<p>Reindexing from a remote server uses an on-heap buffer that defaults to a maximum size of 100mb.
|
|
3985
|
-
If the remote index includes very large documents you'll need to use a smaller batch size.
|
|
3986
|
-
It is also possible to set the socket read timeout on the remote connection with the <code>socket_timeout</code> field and the connection timeout with the <code>connect_timeout</code> field.
|
|
3987
|
-
Both default to 30 seconds.</p>
|
|
3988
|
-
<p><strong>Configuring SSL parameters</strong></p>
|
|
3989
|
-
<p>Reindex from remote supports configurable SSL settings.
|
|
3990
|
-
These must be specified in the <code>elasticsearch.yml</code> file, with the exception of the secure settings, which you add in the Elasticsearch keystore.
|
|
3991
|
-
It is not possible to configure SSL in the body of the reindex request.</p>
|
|
3884
|
+
<p>Refer to the linked documentation for examples of how to reindex documents.</p>
|
|
3992
3885
|
|
|
3993
3886
|
|
|
3994
3887
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-reindex>`_
|
|
@@ -4990,51 +4883,6 @@ class Elasticsearch(BaseClient):
|
|
|
4990
4883
|
<li>Optionally, a <code>geo_bounds</code> aggregation on the <code><field></code>. The search only includes this aggregation if the <code>exact_bounds</code> parameter is <code>true</code>.</li>
|
|
4991
4884
|
<li>If the optional parameter <code>with_labels</code> is <code>true</code>, the internal search will include a dynamic runtime field that calls the <code>getLabelPosition</code> function of the geometry doc value. This enables the generation of new point features containing suggested geometry labels, so that, for example, multi-polygons will have only one label.</li>
|
|
4992
4885
|
</ul>
|
|
4993
|
-
<p>For example, Elasticsearch may translate a vector tile search API request with a <code>grid_agg</code> argument of <code>geotile</code> and an <code>exact_bounds</code> argument of <code>true</code> into the following search</p>
|
|
4994
|
-
<pre><code>GET my-index/_search
|
|
4995
|
-
{
|
|
4996
|
-
"size": 10000,
|
|
4997
|
-
"query": {
|
|
4998
|
-
"geo_bounding_box": {
|
|
4999
|
-
"my-geo-field": {
|
|
5000
|
-
"top_left": {
|
|
5001
|
-
"lat": -40.979898069620134,
|
|
5002
|
-
"lon": -45
|
|
5003
|
-
},
|
|
5004
|
-
"bottom_right": {
|
|
5005
|
-
"lat": -66.51326044311186,
|
|
5006
|
-
"lon": 0
|
|
5007
|
-
}
|
|
5008
|
-
}
|
|
5009
|
-
}
|
|
5010
|
-
},
|
|
5011
|
-
"aggregations": {
|
|
5012
|
-
"grid": {
|
|
5013
|
-
"geotile_grid": {
|
|
5014
|
-
"field": "my-geo-field",
|
|
5015
|
-
"precision": 11,
|
|
5016
|
-
"size": 65536,
|
|
5017
|
-
"bounds": {
|
|
5018
|
-
"top_left": {
|
|
5019
|
-
"lat": -40.979898069620134,
|
|
5020
|
-
"lon": -45
|
|
5021
|
-
},
|
|
5022
|
-
"bottom_right": {
|
|
5023
|
-
"lat": -66.51326044311186,
|
|
5024
|
-
"lon": 0
|
|
5025
|
-
}
|
|
5026
|
-
}
|
|
5027
|
-
}
|
|
5028
|
-
},
|
|
5029
|
-
"bounds": {
|
|
5030
|
-
"geo_bounds": {
|
|
5031
|
-
"field": "my-geo-field",
|
|
5032
|
-
"wrap_longitude": false
|
|
5033
|
-
}
|
|
5034
|
-
}
|
|
5035
|
-
}
|
|
5036
|
-
}
|
|
5037
|
-
</code></pre>
|
|
5038
4886
|
<p>The API returns results as a binary Mapbox vector tile.
|
|
5039
4887
|
Mapbox vector tiles are encoded as Google Protobufs (PBF). By default, the tile contains three layers:</p>
|
|
5040
4888
|
<ul>
|
|
@@ -5289,6 +5137,7 @@ class Elasticsearch(BaseClient):
|
|
|
5289
5137
|
Some cells may intersect more than one vector tile.
|
|
5290
5138
|
To compute the H3 resolution for each precision, Elasticsearch compares the average density of hexagonal bins at each resolution with the average density of tile bins at each zoom level.
|
|
5291
5139
|
Elasticsearch uses the H3 resolution that is closest to the corresponding geotile density.</p>
|
|
5140
|
+
<p>Learn how to use the vector tile search API with practical examples in the <a href="https://www.elastic.co/docs/reference/elasticsearch/rest-apis/vector-tile-search">Vector tile search examples</a> guide.</p>
|
|
5292
5141
|
|
|
5293
5142
|
|
|
5294
5143
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-search-mvt>`_
|
|
@@ -5478,7 +5327,7 @@ class Elasticsearch(BaseClient):
|
|
|
5478
5327
|
:param expand_wildcards: Type of index that wildcard patterns can match. If the
|
|
5479
5328
|
request can target data streams, this argument determines whether wildcard
|
|
5480
5329
|
expressions match hidden data streams. Supports comma-separated values, such
|
|
5481
|
-
as `open,hidden`.
|
|
5330
|
+
as `open,hidden`.
|
|
5482
5331
|
:param ignore_unavailable: If `false`, the request returns an error if it targets
|
|
5483
5332
|
a missing or closed index.
|
|
5484
5333
|
:param local: If `true`, the request retrieves information from the local node
|
|
@@ -5590,8 +5439,7 @@ class Elasticsearch(BaseClient):
|
|
|
5590
5439
|
:param expand_wildcards: The type of index that wildcard patterns can match.
|
|
5591
5440
|
If the request can target data streams, this argument determines whether
|
|
5592
5441
|
wildcard expressions match hidden data streams. Supports comma-separated
|
|
5593
|
-
values, such as `open,hidden`.
|
|
5594
|
-
`hidden`, `none`.
|
|
5442
|
+
values, such as `open,hidden`.
|
|
5595
5443
|
:param explain: If `true`, returns detailed information about score calculation
|
|
5596
5444
|
as part of each hit. If you specify both this and the `explain` query parameter,
|
|
5597
5445
|
the API uses only the query parameter.
|
|
@@ -5812,7 +5660,7 @@ class Elasticsearch(BaseClient):
|
|
|
5812
5660
|
doc: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
5813
5661
|
error_trace: t.Optional[bool] = None,
|
|
5814
5662
|
field_statistics: t.Optional[bool] = None,
|
|
5815
|
-
fields: t.Optional[t.
|
|
5663
|
+
fields: t.Optional[t.Sequence[str]] = None,
|
|
5816
5664
|
filter: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
5817
5665
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
5818
5666
|
human: t.Optional[bool] = None,
|
|
@@ -5865,7 +5713,8 @@ class Elasticsearch(BaseClient):
|
|
|
5865
5713
|
The information is only retrieved for the shard the requested document resides in.
|
|
5866
5714
|
The term and field statistics are therefore only useful as relative measures whereas the absolute numbers have no meaning in this context.
|
|
5867
5715
|
By default, when requesting term vectors of artificial documents, a shard to get the statistics from is randomly selected.
|
|
5868
|
-
Use <code>routing</code> only to hit a particular shard
|
|
5716
|
+
Use <code>routing</code> only to hit a particular shard.
|
|
5717
|
+
Refer to the linked documentation for detailed examples of how to use this API.</p>
|
|
5869
5718
|
|
|
5870
5719
|
|
|
5871
5720
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-termvectors>`_
|
|
@@ -6036,7 +5885,8 @@ class Elasticsearch(BaseClient):
|
|
|
6036
5885
|
</ul>
|
|
6037
5886
|
<p>The document must still be reindexed, but using this API removes some network roundtrips and reduces chances of version conflicts between the GET and the index operation.</p>
|
|
6038
5887
|
<p>The <code>_source</code> field must be enabled to use this API.
|
|
6039
|
-
In addition to <code>_source</code>, you can access the following variables through the <code>ctx</code> map: <code>_index</code>, <code>_type</code>, <code>_id</code>, <code>_version</code>, <code>_routing</code>, and <code>_now</code> (the current timestamp)
|
|
5888
|
+
In addition to <code>_source</code>, you can access the following variables through the <code>ctx</code> map: <code>_index</code>, <code>_type</code>, <code>_id</code>, <code>_version</code>, <code>_routing</code>, and <code>_now</code> (the current timestamp).
|
|
5889
|
+
For usage examples such as partial updates, upserts, and scripted updates, see the External documentation.</p>
|
|
6040
5890
|
|
|
6041
5891
|
|
|
6042
5892
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-update>`_
|
|
@@ -6229,6 +6079,24 @@ class Elasticsearch(BaseClient):
|
|
|
6229
6079
|
A bulk update request is performed for each batch of matching documents.
|
|
6230
6080
|
Any query or update failures cause the update by query request to fail and the failures are shown in the response.
|
|
6231
6081
|
Any update requests that completed successfully still stick, they are not rolled back.</p>
|
|
6082
|
+
<p><strong>Refreshing shards</strong></p>
|
|
6083
|
+
<p>Specifying the <code>refresh</code> parameter refreshes all shards once the request completes.
|
|
6084
|
+
This is different to the update API's <code>refresh</code> parameter, which causes only the shard
|
|
6085
|
+
that received the request to be refreshed. Unlike the update API, it does not support
|
|
6086
|
+
<code>wait_for</code>.</p>
|
|
6087
|
+
<p><strong>Running update by query asynchronously</strong></p>
|
|
6088
|
+
<p>If the request contains <code>wait_for_completion=false</code>, Elasticsearch
|
|
6089
|
+
performs some preflight checks, launches the request, and returns a
|
|
6090
|
+
<a href="https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-tasks">task</a> you can use to cancel or get the status of the task.
|
|
6091
|
+
Elasticsearch creates a record of this task as a document at <code>.tasks/task/${taskId}</code>.</p>
|
|
6092
|
+
<p><strong>Waiting for active shards</strong></p>
|
|
6093
|
+
<p><code>wait_for_active_shards</code> controls how many copies of a shard must be active
|
|
6094
|
+
before proceeding with the request. See <a href="https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-create#operation-create-wait_for_active_shards"><code>wait_for_active_shards</code></a>
|
|
6095
|
+
for details. <code>timeout</code> controls how long each write request waits for unavailable
|
|
6096
|
+
shards to become available. Both work exactly the way they work in the
|
|
6097
|
+
<a href="https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-bulk">Bulk API</a>. Update by query uses scrolled searches, so you can also
|
|
6098
|
+
specify the <code>scroll</code> parameter to control how long it keeps the search context
|
|
6099
|
+
alive, for example <code>?scroll=10m</code>. The default is 5 minutes.</p>
|
|
6232
6100
|
<p><strong>Throttling update requests</strong></p>
|
|
6233
6101
|
<p>To control the rate at which update by query issues batches of update operations, you can set <code>requests_per_second</code> to any positive decimal number.
|
|
6234
6102
|
This pads each batch with a wait time to throttle the rate.
|
|
@@ -6263,18 +6131,8 @@ class Elasticsearch(BaseClient):
|
|
|
6263
6131
|
<li>Query performance is most efficient when the number of slices is equal to the number of shards in the index or backing index. If that number is large (for example, 500), choose a lower number as too many slices hurts performance. Setting slices higher than the number of shards generally does not improve efficiency and adds overhead.</li>
|
|
6264
6132
|
<li>Update performance scales linearly across available resources with the number of slices.</li>
|
|
6265
6133
|
</ul>
|
|
6266
|
-
<p>Whether query or update performance dominates the runtime depends on the documents being reindexed and cluster resources
|
|
6267
|
-
|
|
6268
|
-
<p>Update by query supports scripts to update the document source.
|
|
6269
|
-
As with the update API, you can set <code>ctx.op</code> to change the operation that is performed.</p>
|
|
6270
|
-
<p>Set <code>ctx.op = "noop"</code> if your script decides that it doesn't have to make any changes.
|
|
6271
|
-
The update by query operation skips updating the document and increments the <code>noop</code> counter.</p>
|
|
6272
|
-
<p>Set <code>ctx.op = "delete"</code> if your script decides that the document should be deleted.
|
|
6273
|
-
The update by query operation deletes the document and increments the <code>deleted</code> counter.</p>
|
|
6274
|
-
<p>Update by query supports only <code>index</code>, <code>noop</code>, and <code>delete</code>.
|
|
6275
|
-
Setting <code>ctx.op</code> to anything else is an error.
|
|
6276
|
-
Setting any other field in <code>ctx</code> is an error.
|
|
6277
|
-
This API enables you to only modify the source of matching documents; you cannot move them.</p>
|
|
6134
|
+
<p>Whether query or update performance dominates the runtime depends on the documents being reindexed and cluster resources.
|
|
6135
|
+
Refer to the linked documentation for examples of how to update documents using the <code>_update_by_query</code> API:</p>
|
|
6278
6136
|
|
|
6279
6137
|
|
|
6280
6138
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/v9/operation/operation-update-by-query>`_
|
|
@@ -6302,8 +6160,7 @@ class Elasticsearch(BaseClient):
|
|
|
6302
6160
|
:param expand_wildcards: The type of index that wildcard patterns can match.
|
|
6303
6161
|
If the request can target data streams, this argument determines whether
|
|
6304
6162
|
wildcard expressions match hidden data streams. It supports comma-separated
|
|
6305
|
-
values, such as `open,hidden`.
|
|
6306
|
-
`hidden`, `none`.
|
|
6163
|
+
values, such as `open,hidden`.
|
|
6307
6164
|
:param from_: Skips the specified number of documents.
|
|
6308
6165
|
:param ignore_unavailable: If `false`, the request returns an error if it targets
|
|
6309
6166
|
a missing or closed index.
|