elasticsearch9 9.1.1__py3-none-any.whl → 9.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elasticsearch9/_async/client/__init__.py +69 -65
- elasticsearch9/_async/client/async_search.py +3 -3
- elasticsearch9/_async/client/autoscaling.py +8 -4
- elasticsearch9/_async/client/cat.py +521 -27
- elasticsearch9/_async/client/ccr.py +10 -10
- elasticsearch9/_async/client/cluster.py +34 -33
- elasticsearch9/_async/client/connector.py +45 -44
- elasticsearch9/_async/client/dangling_indices.py +8 -12
- elasticsearch9/_async/client/enrich.py +10 -10
- elasticsearch9/_async/client/eql.py +10 -10
- elasticsearch9/_async/client/esql.py +16 -16
- elasticsearch9/_async/client/features.py +6 -6
- elasticsearch9/_async/client/fleet.py +8 -12
- elasticsearch9/_async/client/graph.py +3 -7
- elasticsearch9/_async/client/ilm.py +20 -28
- elasticsearch9/_async/client/indices.py +163 -169
- elasticsearch9/_async/client/inference.py +41 -127
- elasticsearch9/_async/client/ingest.py +9 -9
- elasticsearch9/_async/client/license.py +5 -7
- elasticsearch9/_async/client/logstash.py +7 -5
- elasticsearch9/_async/client/migration.py +6 -6
- elasticsearch9/_async/client/ml.py +125 -85
- elasticsearch9/_async/client/monitoring.py +4 -3
- elasticsearch9/_async/client/nodes.py +17 -17
- elasticsearch9/_async/client/query_rules.py +16 -16
- elasticsearch9/_async/client/rollup.py +21 -21
- elasticsearch9/_async/client/search_application.py +19 -19
- elasticsearch9/_async/client/searchable_snapshots.py +10 -10
- elasticsearch9/_async/client/security.py +8 -7
- elasticsearch9/_async/client/shutdown.py +14 -19
- elasticsearch9/_async/client/simulate.py +4 -4
- elasticsearch9/_async/client/slm.py +18 -22
- elasticsearch9/_async/client/snapshot.py +20 -20
- elasticsearch9/_async/client/sql.py +10 -10
- elasticsearch9/_async/client/streams.py +186 -0
- elasticsearch9/_async/client/synonyms.py +10 -10
- elasticsearch9/_async/client/tasks.py +8 -8
- elasticsearch9/_async/client/text_structure.py +13 -9
- elasticsearch9/_async/client/transform.py +51 -12
- elasticsearch9/_async/client/utils.py +4 -2
- elasticsearch9/_async/client/watcher.py +27 -31
- elasticsearch9/_async/client/xpack.py +6 -5
- elasticsearch9/_async/helpers.py +58 -9
- elasticsearch9/_sync/client/__init__.py +71 -65
- elasticsearch9/_sync/client/async_search.py +3 -3
- elasticsearch9/_sync/client/autoscaling.py +8 -4
- elasticsearch9/_sync/client/cat.py +521 -27
- elasticsearch9/_sync/client/ccr.py +10 -10
- elasticsearch9/_sync/client/cluster.py +34 -33
- elasticsearch9/_sync/client/connector.py +45 -44
- elasticsearch9/_sync/client/dangling_indices.py +8 -12
- elasticsearch9/_sync/client/enrich.py +10 -10
- elasticsearch9/_sync/client/eql.py +10 -10
- elasticsearch9/_sync/client/esql.py +16 -16
- elasticsearch9/_sync/client/features.py +6 -6
- elasticsearch9/_sync/client/fleet.py +8 -12
- elasticsearch9/_sync/client/graph.py +3 -7
- elasticsearch9/_sync/client/ilm.py +20 -28
- elasticsearch9/_sync/client/indices.py +163 -169
- elasticsearch9/_sync/client/inference.py +41 -127
- elasticsearch9/_sync/client/ingest.py +9 -9
- elasticsearch9/_sync/client/license.py +5 -7
- elasticsearch9/_sync/client/logstash.py +7 -5
- elasticsearch9/_sync/client/migration.py +6 -6
- elasticsearch9/_sync/client/ml.py +125 -85
- elasticsearch9/_sync/client/monitoring.py +4 -3
- elasticsearch9/_sync/client/nodes.py +17 -17
- elasticsearch9/_sync/client/query_rules.py +16 -16
- elasticsearch9/_sync/client/rollup.py +21 -21
- elasticsearch9/_sync/client/search_application.py +19 -19
- elasticsearch9/_sync/client/searchable_snapshots.py +10 -10
- elasticsearch9/_sync/client/security.py +8 -7
- elasticsearch9/_sync/client/shutdown.py +14 -19
- elasticsearch9/_sync/client/simulate.py +4 -4
- elasticsearch9/_sync/client/slm.py +18 -22
- elasticsearch9/_sync/client/snapshot.py +20 -20
- elasticsearch9/_sync/client/sql.py +10 -10
- elasticsearch9/_sync/client/streams.py +186 -0
- elasticsearch9/_sync/client/synonyms.py +10 -10
- elasticsearch9/_sync/client/tasks.py +8 -8
- elasticsearch9/_sync/client/text_structure.py +13 -9
- elasticsearch9/_sync/client/transform.py +51 -12
- elasticsearch9/_sync/client/utils.py +16 -2
- elasticsearch9/_sync/client/watcher.py +27 -31
- elasticsearch9/_sync/client/xpack.py +6 -5
- elasticsearch9/_version.py +2 -1
- elasticsearch9/client.py +2 -0
- elasticsearch9/compat.py +43 -1
- elasticsearch9/dsl/__init__.py +28 -0
- elasticsearch9/dsl/_async/document.py +4 -5
- elasticsearch9/dsl/_async/index.py +1 -1
- elasticsearch9/dsl/_async/search.py +2 -3
- elasticsearch9/dsl/_sync/document.py +4 -5
- elasticsearch9/dsl/_sync/index.py +1 -1
- elasticsearch9/dsl/_sync/search.py +2 -3
- elasticsearch9/dsl/aggs.py +100 -3
- elasticsearch9/dsl/async_connections.py +1 -2
- elasticsearch9/dsl/connections.py +1 -2
- elasticsearch9/dsl/document_base.py +15 -0
- elasticsearch9/dsl/field.py +12 -1
- elasticsearch9/dsl/query.py +23 -0
- elasticsearch9/dsl/response/__init__.py +3 -0
- elasticsearch9/dsl/serializer.py +1 -2
- elasticsearch9/dsl/types.py +185 -5
- elasticsearch9/dsl/utils.py +1 -2
- elasticsearch9/esql/esql.py +1 -1
- elasticsearch9/esql/functions.py +2 -2
- elasticsearch9/helpers/__init__.py +10 -1
- elasticsearch9/helpers/actions.py +106 -33
- elasticsearch9/helpers/vectorstore/__init__.py +7 -7
- elasticsearch9/helpers/vectorstore/_async/_utils.py +1 -1
- elasticsearch9/helpers/vectorstore/_async/embedding_service.py +2 -2
- elasticsearch9/helpers/vectorstore/_async/strategies.py +3 -3
- elasticsearch9/helpers/vectorstore/_async/vectorstore.py +5 -5
- elasticsearch9/helpers/vectorstore/_sync/_utils.py +1 -1
- elasticsearch9/helpers/vectorstore/_sync/embedding_service.py +2 -2
- elasticsearch9/helpers/vectorstore/_sync/strategies.py +3 -3
- elasticsearch9/helpers/vectorstore/_sync/vectorstore.py +5 -5
- {elasticsearch9-9.1.1.dist-info → elasticsearch9-9.1.3.dist-info}/METADATA +2 -2
- elasticsearch9-9.1.3.dist-info/RECORD +165 -0
- {elasticsearch9-9.1.1.dist-info → elasticsearch9-9.1.3.dist-info}/WHEEL +1 -1
- elasticsearch9-9.1.1.dist-info/RECORD +0 -163
- {elasticsearch9-9.1.1.dist-info → elasticsearch9-9.1.3.dist-info}/licenses/LICENSE +0 -0
- {elasticsearch9-9.1.1.dist-info → elasticsearch9-9.1.3.dist-info}/licenses/NOTICE +0 -0
elasticsearch9/dsl/types.py
CHANGED
|
@@ -938,6 +938,7 @@ class GeoDistanceSort(AttrDict[Any]):
|
|
|
938
938
|
Dict[str, Any],
|
|
939
939
|
"DefaultType",
|
|
940
940
|
] = DEFAULT,
|
|
941
|
+
/,
|
|
941
942
|
*,
|
|
942
943
|
mode: Union[
|
|
943
944
|
Literal["min", "max", "sum", "avg", "median"], DefaultType
|
|
@@ -2774,6 +2775,31 @@ class NumericFielddata(AttrDict[Any]):
|
|
|
2774
2775
|
super().__init__(kwargs)
|
|
2775
2776
|
|
|
2776
2777
|
|
|
2778
|
+
class PValueHeuristic(AttrDict[Any]):
|
|
2779
|
+
"""
|
|
2780
|
+
:arg background_is_superset:
|
|
2781
|
+
:arg normalize_above: Should the results be normalized when above the
|
|
2782
|
+
given value. Allows for consistent significance results at various
|
|
2783
|
+
scales. Note: `0` is a special value which means no normalization
|
|
2784
|
+
"""
|
|
2785
|
+
|
|
2786
|
+
background_is_superset: Union[bool, DefaultType]
|
|
2787
|
+
normalize_above: Union[int, DefaultType]
|
|
2788
|
+
|
|
2789
|
+
def __init__(
|
|
2790
|
+
self,
|
|
2791
|
+
*,
|
|
2792
|
+
background_is_superset: Union[bool, DefaultType] = DEFAULT,
|
|
2793
|
+
normalize_above: Union[int, DefaultType] = DEFAULT,
|
|
2794
|
+
**kwargs: Any,
|
|
2795
|
+
):
|
|
2796
|
+
if background_is_superset is not DEFAULT:
|
|
2797
|
+
kwargs["background_is_superset"] = background_is_superset
|
|
2798
|
+
if normalize_above is not DEFAULT:
|
|
2799
|
+
kwargs["normalize_above"] = normalize_above
|
|
2800
|
+
super().__init__(kwargs)
|
|
2801
|
+
|
|
2802
|
+
|
|
2777
2803
|
class PercentageScoreHeuristic(AttrDict[Any]):
|
|
2778
2804
|
pass
|
|
2779
2805
|
|
|
@@ -3219,6 +3245,7 @@ class SortOptions(AttrDict[Any]):
|
|
|
3219
3245
|
self,
|
|
3220
3246
|
_field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
|
|
3221
3247
|
_value: Union["FieldSort", Dict[str, Any], "DefaultType"] = DEFAULT,
|
|
3248
|
+
/,
|
|
3222
3249
|
*,
|
|
3223
3250
|
_score: Union["ScoreSort", Dict[str, Any], DefaultType] = DEFAULT,
|
|
3224
3251
|
_doc: Union["ScoreSort", Dict[str, Any], DefaultType] = DEFAULT,
|
|
@@ -4009,24 +4036,25 @@ class TestPopulation(AttrDict[Any]):
|
|
|
4009
4036
|
|
|
4010
4037
|
class TextEmbedding(AttrDict[Any]):
|
|
4011
4038
|
"""
|
|
4012
|
-
:arg model_id: (required)
|
|
4013
4039
|
:arg model_text: (required)
|
|
4040
|
+
:arg model_id: Model ID is required for all dense_vector fields but
|
|
4041
|
+
may be inferred for semantic_text fields
|
|
4014
4042
|
"""
|
|
4015
4043
|
|
|
4016
|
-
model_id: Union[str, DefaultType]
|
|
4017
4044
|
model_text: Union[str, DefaultType]
|
|
4045
|
+
model_id: Union[str, DefaultType]
|
|
4018
4046
|
|
|
4019
4047
|
def __init__(
|
|
4020
4048
|
self,
|
|
4021
4049
|
*,
|
|
4022
|
-
model_id: Union[str, DefaultType] = DEFAULT,
|
|
4023
4050
|
model_text: Union[str, DefaultType] = DEFAULT,
|
|
4051
|
+
model_id: Union[str, DefaultType] = DEFAULT,
|
|
4024
4052
|
**kwargs: Any,
|
|
4025
4053
|
):
|
|
4026
|
-
if model_id is not DEFAULT:
|
|
4027
|
-
kwargs["model_id"] = model_id
|
|
4028
4054
|
if model_text is not DEFAULT:
|
|
4029
4055
|
kwargs["model_text"] = model_text
|
|
4056
|
+
if model_id is not DEFAULT:
|
|
4057
|
+
kwargs["model_id"] = model_id
|
|
4030
4058
|
super().__init__(kwargs)
|
|
4031
4059
|
|
|
4032
4060
|
|
|
@@ -4659,6 +4687,82 @@ class CardinalityAggregate(AttrDict[Any]):
|
|
|
4659
4687
|
meta: Mapping[str, Any]
|
|
4660
4688
|
|
|
4661
4689
|
|
|
4690
|
+
class CartesianBoundsAggregate(AttrDict[Any]):
|
|
4691
|
+
"""
|
|
4692
|
+
:arg bounds:
|
|
4693
|
+
:arg meta:
|
|
4694
|
+
"""
|
|
4695
|
+
|
|
4696
|
+
bounds: "TopLeftBottomRightGeoBounds"
|
|
4697
|
+
meta: Mapping[str, Any]
|
|
4698
|
+
|
|
4699
|
+
|
|
4700
|
+
class CartesianCentroidAggregate(AttrDict[Any]):
|
|
4701
|
+
"""
|
|
4702
|
+
:arg count: (required)
|
|
4703
|
+
:arg location:
|
|
4704
|
+
:arg meta:
|
|
4705
|
+
"""
|
|
4706
|
+
|
|
4707
|
+
count: int
|
|
4708
|
+
location: "CartesianPoint"
|
|
4709
|
+
meta: Mapping[str, Any]
|
|
4710
|
+
|
|
4711
|
+
|
|
4712
|
+
class CartesianPoint(AttrDict[Any]):
|
|
4713
|
+
"""
|
|
4714
|
+
:arg x: (required)
|
|
4715
|
+
:arg y: (required)
|
|
4716
|
+
"""
|
|
4717
|
+
|
|
4718
|
+
x: float
|
|
4719
|
+
y: float
|
|
4720
|
+
|
|
4721
|
+
|
|
4722
|
+
class ChangePointAggregate(AttrDict[Any]):
|
|
4723
|
+
"""
|
|
4724
|
+
:arg type: (required)
|
|
4725
|
+
:arg bucket:
|
|
4726
|
+
:arg meta:
|
|
4727
|
+
"""
|
|
4728
|
+
|
|
4729
|
+
type: "ChangeType"
|
|
4730
|
+
bucket: "ChangePointBucket"
|
|
4731
|
+
meta: Mapping[str, Any]
|
|
4732
|
+
|
|
4733
|
+
|
|
4734
|
+
class ChangePointBucket(AttrDict[Any]):
|
|
4735
|
+
"""
|
|
4736
|
+
:arg key: (required)
|
|
4737
|
+
:arg doc_count: (required)
|
|
4738
|
+
"""
|
|
4739
|
+
|
|
4740
|
+
key: Union[int, float, str, bool, None]
|
|
4741
|
+
doc_count: int
|
|
4742
|
+
|
|
4743
|
+
|
|
4744
|
+
class ChangeType(AttrDict[Any]):
|
|
4745
|
+
"""
|
|
4746
|
+
:arg dip:
|
|
4747
|
+
:arg distribution_change:
|
|
4748
|
+
:arg indeterminable:
|
|
4749
|
+
:arg non_stationary:
|
|
4750
|
+
:arg spike:
|
|
4751
|
+
:arg stationary:
|
|
4752
|
+
:arg step_change:
|
|
4753
|
+
:arg trend_change:
|
|
4754
|
+
"""
|
|
4755
|
+
|
|
4756
|
+
dip: "Dip"
|
|
4757
|
+
distribution_change: "DistributionChange"
|
|
4758
|
+
indeterminable: "Indeterminable"
|
|
4759
|
+
non_stationary: "NonStationary"
|
|
4760
|
+
spike: "Spike"
|
|
4761
|
+
stationary: "Stationary"
|
|
4762
|
+
step_change: "StepChange"
|
|
4763
|
+
trend_change: "TrendChange"
|
|
4764
|
+
|
|
4765
|
+
|
|
4662
4766
|
class ChildrenAggregate(AttrDict[Any]):
|
|
4663
4767
|
"""
|
|
4664
4768
|
:arg doc_count: (required)
|
|
@@ -4936,6 +5040,26 @@ class DfsStatisticsProfile(AttrDict[Any]):
|
|
|
4936
5040
|
children: Sequence["DfsStatisticsProfile"]
|
|
4937
5041
|
|
|
4938
5042
|
|
|
5043
|
+
class Dip(AttrDict[Any]):
|
|
5044
|
+
"""
|
|
5045
|
+
:arg p_value: (required)
|
|
5046
|
+
:arg change_point: (required)
|
|
5047
|
+
"""
|
|
5048
|
+
|
|
5049
|
+
p_value: float
|
|
5050
|
+
change_point: int
|
|
5051
|
+
|
|
5052
|
+
|
|
5053
|
+
class DistributionChange(AttrDict[Any]):
|
|
5054
|
+
"""
|
|
5055
|
+
:arg p_value: (required)
|
|
5056
|
+
:arg change_point: (required)
|
|
5057
|
+
"""
|
|
5058
|
+
|
|
5059
|
+
p_value: float
|
|
5060
|
+
change_point: int
|
|
5061
|
+
|
|
5062
|
+
|
|
4939
5063
|
class DoubleTermsAggregate(AttrDict[Any]):
|
|
4940
5064
|
"""
|
|
4941
5065
|
Result of a `terms` aggregation when the field is some kind of decimal
|
|
@@ -5497,6 +5621,14 @@ class HitsMetadata(AttrDict[Any]):
|
|
|
5497
5621
|
max_score: Union[float, None]
|
|
5498
5622
|
|
|
5499
5623
|
|
|
5624
|
+
class Indeterminable(AttrDict[Any]):
|
|
5625
|
+
"""
|
|
5626
|
+
:arg reason: (required)
|
|
5627
|
+
"""
|
|
5628
|
+
|
|
5629
|
+
reason: str
|
|
5630
|
+
|
|
5631
|
+
|
|
5500
5632
|
class InferenceAggregate(AttrDict[Any]):
|
|
5501
5633
|
"""
|
|
5502
5634
|
:arg value:
|
|
@@ -5899,6 +6031,18 @@ class NestedIdentity(AttrDict[Any]):
|
|
|
5899
6031
|
_nested: "NestedIdentity"
|
|
5900
6032
|
|
|
5901
6033
|
|
|
6034
|
+
class NonStationary(AttrDict[Any]):
|
|
6035
|
+
"""
|
|
6036
|
+
:arg p_value: (required)
|
|
6037
|
+
:arg r_value: (required)
|
|
6038
|
+
:arg trend: (required)
|
|
6039
|
+
"""
|
|
6040
|
+
|
|
6041
|
+
p_value: float
|
|
6042
|
+
r_value: float
|
|
6043
|
+
trend: str
|
|
6044
|
+
|
|
6045
|
+
|
|
5902
6046
|
class ParentAggregate(AttrDict[Any]):
|
|
5903
6047
|
"""
|
|
5904
6048
|
:arg doc_count: (required)
|
|
@@ -6256,6 +6400,16 @@ class SimpleValueAggregate(AttrDict[Any]):
|
|
|
6256
6400
|
meta: Mapping[str, Any]
|
|
6257
6401
|
|
|
6258
6402
|
|
|
6403
|
+
class Spike(AttrDict[Any]):
|
|
6404
|
+
"""
|
|
6405
|
+
:arg p_value: (required)
|
|
6406
|
+
:arg change_point: (required)
|
|
6407
|
+
"""
|
|
6408
|
+
|
|
6409
|
+
p_value: float
|
|
6410
|
+
change_point: int
|
|
6411
|
+
|
|
6412
|
+
|
|
6259
6413
|
class StandardDeviationBounds(AttrDict[Any]):
|
|
6260
6414
|
"""
|
|
6261
6415
|
:arg upper: (required)
|
|
@@ -6292,6 +6446,10 @@ class StandardDeviationBoundsAsString(AttrDict[Any]):
|
|
|
6292
6446
|
lower_sampling: str
|
|
6293
6447
|
|
|
6294
6448
|
|
|
6449
|
+
class Stationary(AttrDict[Any]):
|
|
6450
|
+
pass
|
|
6451
|
+
|
|
6452
|
+
|
|
6295
6453
|
class StatsAggregate(AttrDict[Any]):
|
|
6296
6454
|
"""
|
|
6297
6455
|
Statistics aggregation result. `min`, `max` and `avg` are missing if
|
|
@@ -6347,6 +6505,16 @@ class StatsBucketAggregate(AttrDict[Any]):
|
|
|
6347
6505
|
meta: Mapping[str, Any]
|
|
6348
6506
|
|
|
6349
6507
|
|
|
6508
|
+
class StepChange(AttrDict[Any]):
|
|
6509
|
+
"""
|
|
6510
|
+
:arg p_value: (required)
|
|
6511
|
+
:arg change_point: (required)
|
|
6512
|
+
"""
|
|
6513
|
+
|
|
6514
|
+
p_value: float
|
|
6515
|
+
change_point: int
|
|
6516
|
+
|
|
6517
|
+
|
|
6350
6518
|
class StringRareTermsAggregate(AttrDict[Any]):
|
|
6351
6519
|
"""
|
|
6352
6520
|
Result of the `rare_terms` aggregation when the field is a string.
|
|
@@ -6578,6 +6746,18 @@ class TotalHits(AttrDict[Any]):
|
|
|
6578
6746
|
value: int
|
|
6579
6747
|
|
|
6580
6748
|
|
|
6749
|
+
class TrendChange(AttrDict[Any]):
|
|
6750
|
+
"""
|
|
6751
|
+
:arg p_value: (required)
|
|
6752
|
+
:arg r_value: (required)
|
|
6753
|
+
:arg change_point: (required)
|
|
6754
|
+
"""
|
|
6755
|
+
|
|
6756
|
+
p_value: float
|
|
6757
|
+
r_value: float
|
|
6758
|
+
change_point: int
|
|
6759
|
+
|
|
6760
|
+
|
|
6581
6761
|
class UnmappedRareTermsAggregate(AttrDict[Any]):
|
|
6582
6762
|
"""
|
|
6583
6763
|
Result of a `rare_terms` aggregation when the field is unmapped.
|
elasticsearch9/dsl/utils.py
CHANGED
|
@@ -44,8 +44,7 @@ from .exceptions import UnknownDslObject, ValidationException
|
|
|
44
44
|
if TYPE_CHECKING:
|
|
45
45
|
from elastic_transport import ObjectApiResponse
|
|
46
46
|
|
|
47
|
-
from
|
|
48
|
-
|
|
47
|
+
from .. import AsyncElasticsearch, Elasticsearch
|
|
49
48
|
from .document_base import DocumentOptions
|
|
50
49
|
from .field import Field
|
|
51
50
|
from .index_base import IndexBase
|
elasticsearch9/esql/esql.py
CHANGED
elasticsearch9/esql/functions.py
CHANGED
|
@@ -18,8 +18,8 @@
|
|
|
18
18
|
import json
|
|
19
19
|
from typing import Any
|
|
20
20
|
|
|
21
|
-
from
|
|
22
|
-
from
|
|
21
|
+
from ..dsl.document_base import InstrumentedExpression
|
|
22
|
+
from ..esql.esql import ESQLBase, ExpressionType
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
def _render(v: Any) -> str:
|
|
@@ -19,12 +19,21 @@ from .._async.helpers import async_bulk, async_reindex, async_scan, async_stream
|
|
|
19
19
|
from .._utils import fixup_module_metadata
|
|
20
20
|
from .actions import _chunk_actions # noqa: F401
|
|
21
21
|
from .actions import _process_bulk_chunk # noqa: F401
|
|
22
|
-
from .actions import
|
|
22
|
+
from .actions import (
|
|
23
|
+
BULK_FLUSH,
|
|
24
|
+
bulk,
|
|
25
|
+
expand_action,
|
|
26
|
+
parallel_bulk,
|
|
27
|
+
reindex,
|
|
28
|
+
scan,
|
|
29
|
+
streaming_bulk,
|
|
30
|
+
)
|
|
23
31
|
from .errors import BulkIndexError, ScanError
|
|
24
32
|
|
|
25
33
|
__all__ = [
|
|
26
34
|
"BulkIndexError",
|
|
27
35
|
"ScanError",
|
|
36
|
+
"BULK_FLUSH",
|
|
28
37
|
"expand_action",
|
|
29
38
|
"streaming_bulk",
|
|
30
39
|
"bulk",
|
|
@@ -16,9 +16,10 @@
|
|
|
16
16
|
# under the License.
|
|
17
17
|
|
|
18
18
|
import logging
|
|
19
|
+
import queue
|
|
19
20
|
import time
|
|
21
|
+
from enum import Enum
|
|
20
22
|
from operator import methodcaller
|
|
21
|
-
from queue import Queue
|
|
22
23
|
from typing import (
|
|
23
24
|
Any,
|
|
24
25
|
Callable,
|
|
@@ -37,13 +38,21 @@ from typing import (
|
|
|
37
38
|
from elastic_transport import OpenTelemetrySpan
|
|
38
39
|
|
|
39
40
|
from .. import Elasticsearch
|
|
40
|
-
from ..compat import to_bytes
|
|
41
|
+
from ..compat import safe_thread, to_bytes
|
|
41
42
|
from ..exceptions import ApiError, NotFoundError, TransportError
|
|
42
43
|
from ..serializer import Serializer
|
|
43
44
|
from .errors import BulkIndexError, ScanError
|
|
44
45
|
|
|
45
46
|
logger = logging.getLogger("elasticsearch.helpers")
|
|
46
47
|
|
|
48
|
+
|
|
49
|
+
class BulkMeta(Enum):
|
|
50
|
+
flush = 1
|
|
51
|
+
done = 2
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
BULK_FLUSH = BulkMeta.flush
|
|
55
|
+
|
|
47
56
|
_TYPE_BULK_ACTION = Union[bytes, str, Dict[str, Any]]
|
|
48
57
|
_TYPE_BULK_ACTION_HEADER = Dict[str, Any]
|
|
49
58
|
_TYPE_BULK_ACTION_BODY = Union[None, bytes, Dict[str, Any]]
|
|
@@ -51,6 +60,13 @@ _TYPE_BULK_ACTION_HEADER_AND_BODY = Tuple[
|
|
|
51
60
|
_TYPE_BULK_ACTION_HEADER, _TYPE_BULK_ACTION_BODY
|
|
52
61
|
]
|
|
53
62
|
|
|
63
|
+
_TYPE_BULK_ACTION_WITH_META = Union[bytes, str, Dict[str, Any], BulkMeta]
|
|
64
|
+
_TYPE_BULK_ACTION_HEADER_WITH_META = Union[Dict[str, Any], BulkMeta]
|
|
65
|
+
_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY = Union[
|
|
66
|
+
Tuple[_TYPE_BULK_ACTION_HEADER, _TYPE_BULK_ACTION_BODY],
|
|
67
|
+
Tuple[BulkMeta, Any],
|
|
68
|
+
]
|
|
69
|
+
|
|
54
70
|
|
|
55
71
|
def expand_action(data: _TYPE_BULK_ACTION) -> _TYPE_BULK_ACTION_HEADER_AND_BODY:
|
|
56
72
|
"""
|
|
@@ -139,7 +155,9 @@ class _ActionChunker:
|
|
|
139
155
|
] = []
|
|
140
156
|
|
|
141
157
|
def feed(
|
|
142
|
-
self,
|
|
158
|
+
self,
|
|
159
|
+
action: _TYPE_BULK_ACTION_HEADER_WITH_META,
|
|
160
|
+
data: _TYPE_BULK_ACTION_BODY,
|
|
143
161
|
) -> Optional[
|
|
144
162
|
Tuple[
|
|
145
163
|
List[
|
|
@@ -152,23 +170,25 @@ class _ActionChunker:
|
|
|
152
170
|
]
|
|
153
171
|
]:
|
|
154
172
|
ret = None
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
173
|
+
action_bytes = b""
|
|
174
|
+
data_bytes: Optional[bytes] = None
|
|
175
|
+
cur_size = 0
|
|
176
|
+
if not isinstance(action, BulkMeta):
|
|
177
|
+
action_bytes = to_bytes(self.serializer.dumps(action), "utf-8")
|
|
178
|
+
# +1 to account for the trailing new line character
|
|
179
|
+
cur_size = len(action_bytes) + 1
|
|
180
|
+
|
|
181
|
+
if data is not None:
|
|
182
|
+
data_bytes = to_bytes(self.serializer.dumps(data), "utf-8")
|
|
183
|
+
cur_size += len(data_bytes) + 1
|
|
184
|
+
else:
|
|
185
|
+
data_bytes = None
|
|
167
186
|
|
|
168
187
|
# full chunk, send it and start a new one
|
|
169
188
|
if self.bulk_actions and (
|
|
170
189
|
self.size + cur_size > self.max_chunk_bytes
|
|
171
190
|
or self.action_count == self.chunk_size
|
|
191
|
+
or (action == BulkMeta.flush and self.bulk_actions)
|
|
172
192
|
):
|
|
173
193
|
ret = (self.bulk_data, self.bulk_actions)
|
|
174
194
|
self.bulk_actions = []
|
|
@@ -176,15 +196,16 @@ class _ActionChunker:
|
|
|
176
196
|
self.size = 0
|
|
177
197
|
self.action_count = 0
|
|
178
198
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
199
|
+
if not isinstance(action, BulkMeta):
|
|
200
|
+
self.bulk_actions.append(action_bytes)
|
|
201
|
+
if data_bytes is not None:
|
|
202
|
+
self.bulk_actions.append(data_bytes)
|
|
203
|
+
self.bulk_data.append((action, data))
|
|
204
|
+
else:
|
|
205
|
+
self.bulk_data.append((action,))
|
|
185
206
|
|
|
186
|
-
|
|
187
|
-
|
|
207
|
+
self.size += cur_size
|
|
208
|
+
self.action_count += 1
|
|
188
209
|
return ret
|
|
189
210
|
|
|
190
211
|
def flush(
|
|
@@ -209,9 +230,10 @@ class _ActionChunker:
|
|
|
209
230
|
|
|
210
231
|
|
|
211
232
|
def _chunk_actions(
|
|
212
|
-
actions: Iterable[
|
|
233
|
+
actions: Iterable[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY],
|
|
213
234
|
chunk_size: int,
|
|
214
235
|
max_chunk_bytes: int,
|
|
236
|
+
flush_after_seconds: Optional[float],
|
|
215
237
|
serializer: Serializer,
|
|
216
238
|
) -> Iterable[
|
|
217
239
|
Tuple[
|
|
@@ -231,10 +253,41 @@ def _chunk_actions(
|
|
|
231
253
|
chunker = _ActionChunker(
|
|
232
254
|
chunk_size=chunk_size, max_chunk_bytes=max_chunk_bytes, serializer=serializer
|
|
233
255
|
)
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
256
|
+
|
|
257
|
+
if not flush_after_seconds:
|
|
258
|
+
for action, data in actions:
|
|
259
|
+
ret = chunker.feed(action, data)
|
|
260
|
+
if ret:
|
|
261
|
+
yield ret
|
|
262
|
+
else:
|
|
263
|
+
item_queue: queue.Queue[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY] = (
|
|
264
|
+
queue.Queue()
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
def get_items() -> None:
|
|
268
|
+
try:
|
|
269
|
+
for item in actions:
|
|
270
|
+
item_queue.put(item)
|
|
271
|
+
finally:
|
|
272
|
+
# make sure we signal the end even if there is an exception
|
|
273
|
+
item_queue.put((BulkMeta.done, None))
|
|
274
|
+
|
|
275
|
+
with safe_thread(get_items):
|
|
276
|
+
timeout: Optional[float] = flush_after_seconds
|
|
277
|
+
while True:
|
|
278
|
+
try:
|
|
279
|
+
action, data = item_queue.get(timeout=timeout)
|
|
280
|
+
timeout = flush_after_seconds
|
|
281
|
+
except queue.Empty:
|
|
282
|
+
action, data = BulkMeta.flush, None
|
|
283
|
+
timeout = None
|
|
284
|
+
|
|
285
|
+
if action is BulkMeta.done:
|
|
286
|
+
break
|
|
287
|
+
ret = chunker.feed(action, data)
|
|
288
|
+
if ret:
|
|
289
|
+
yield ret
|
|
290
|
+
|
|
238
291
|
ret = chunker.flush()
|
|
239
292
|
if ret:
|
|
240
293
|
yield ret
|
|
@@ -361,9 +414,10 @@ def _process_bulk_chunk(
|
|
|
361
414
|
|
|
362
415
|
def streaming_bulk(
|
|
363
416
|
client: Elasticsearch,
|
|
364
|
-
actions: Iterable[
|
|
417
|
+
actions: Iterable[_TYPE_BULK_ACTION_WITH_META],
|
|
365
418
|
chunk_size: int = 500,
|
|
366
419
|
max_chunk_bytes: int = 100 * 1024 * 1024,
|
|
420
|
+
flush_after_seconds: Optional[float] = None,
|
|
367
421
|
raise_on_error: bool = True,
|
|
368
422
|
expand_action_callback: Callable[
|
|
369
423
|
[_TYPE_BULK_ACTION], _TYPE_BULK_ACTION_HEADER_AND_BODY
|
|
@@ -397,6 +451,9 @@ def streaming_bulk(
|
|
|
397
451
|
:arg actions: iterable containing the actions to be executed
|
|
398
452
|
:arg chunk_size: number of docs in one chunk sent to es (default: 500)
|
|
399
453
|
:arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB)
|
|
454
|
+
:arg flush_after_seconds: time in seconds after which a chunk is written even
|
|
455
|
+
if hasn't reached `chunk_size` or `max_chunk_bytes`. Set to 0 to not use a
|
|
456
|
+
timeout-based flush. (default: 0)
|
|
400
457
|
:arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`)
|
|
401
458
|
from the execution of the last chunk when some occur. By default we raise.
|
|
402
459
|
:arg raise_on_exception: if ``False`` then don't propagate exceptions from
|
|
@@ -425,6 +482,13 @@ def streaming_bulk(
|
|
|
425
482
|
|
|
426
483
|
serializer = client.transport.serializers.get_serializer("application/json")
|
|
427
484
|
|
|
485
|
+
def expand_action_with_meta(
|
|
486
|
+
data: _TYPE_BULK_ACTION_WITH_META,
|
|
487
|
+
) -> _TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY:
|
|
488
|
+
if isinstance(data, BulkMeta):
|
|
489
|
+
return data, None
|
|
490
|
+
return expand_action_callback(data)
|
|
491
|
+
|
|
428
492
|
bulk_data: List[
|
|
429
493
|
Union[
|
|
430
494
|
Tuple[_TYPE_BULK_ACTION_HEADER],
|
|
@@ -433,9 +497,10 @@ def streaming_bulk(
|
|
|
433
497
|
]
|
|
434
498
|
bulk_actions: List[bytes]
|
|
435
499
|
for bulk_data, bulk_actions in _chunk_actions(
|
|
436
|
-
map(
|
|
500
|
+
map(expand_action_with_meta, actions),
|
|
437
501
|
chunk_size,
|
|
438
502
|
max_chunk_bytes,
|
|
503
|
+
flush_after_seconds,
|
|
439
504
|
serializer,
|
|
440
505
|
):
|
|
441
506
|
for attempt in range(max_retries + 1):
|
|
@@ -557,6 +622,7 @@ def parallel_bulk(
|
|
|
557
622
|
thread_count: int = 4,
|
|
558
623
|
chunk_size: int = 500,
|
|
559
624
|
max_chunk_bytes: int = 100 * 1024 * 1024,
|
|
625
|
+
flush_after_seconds: Optional[float] = None,
|
|
560
626
|
queue_size: int = 4,
|
|
561
627
|
expand_action_callback: Callable[
|
|
562
628
|
[_TYPE_BULK_ACTION], _TYPE_BULK_ACTION_HEADER_AND_BODY
|
|
@@ -573,6 +639,9 @@ def parallel_bulk(
|
|
|
573
639
|
:arg thread_count: size of the threadpool to use for the bulk requests
|
|
574
640
|
:arg chunk_size: number of docs in one chunk sent to es (default: 500)
|
|
575
641
|
:arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB)
|
|
642
|
+
:arg flush_after_seconds: time in seconds after which a chunk is written even
|
|
643
|
+
if hasn't reached `chunk_size` or `max_chunk_bytes`. Set to 0 to not use a
|
|
644
|
+
timeout-based flush. (default: 0)
|
|
576
645
|
:arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`)
|
|
577
646
|
from the execution of the last chunk when some occur. By default we raise.
|
|
578
647
|
:arg raise_on_exception: if ``False`` then don't propagate exceptions from
|
|
@@ -596,7 +665,7 @@ def parallel_bulk(
|
|
|
596
665
|
super()._setup_queues() # type: ignore[misc]
|
|
597
666
|
# The queue must be at least the size of the number of threads to
|
|
598
667
|
# prevent hanging when inserting sentinel values during teardown.
|
|
599
|
-
self._inqueue: Queue[
|
|
668
|
+
self._inqueue: queue.Queue[
|
|
600
669
|
Tuple[
|
|
601
670
|
List[
|
|
602
671
|
Union[
|
|
@@ -605,7 +674,7 @@ def parallel_bulk(
|
|
|
605
674
|
],
|
|
606
675
|
List[bytes],
|
|
607
676
|
]
|
|
608
|
-
] = Queue(max(queue_size, thread_count))
|
|
677
|
+
] = queue.Queue(max(queue_size, thread_count))
|
|
609
678
|
self._quick_put = self._inqueue.put
|
|
610
679
|
|
|
611
680
|
with client._otel.helpers_span("helpers.parallel_bulk") as otel_span:
|
|
@@ -625,7 +694,11 @@ def parallel_bulk(
|
|
|
625
694
|
)
|
|
626
695
|
),
|
|
627
696
|
_chunk_actions(
|
|
628
|
-
expanded_actions,
|
|
697
|
+
expanded_actions,
|
|
698
|
+
chunk_size,
|
|
699
|
+
max_chunk_bytes,
|
|
700
|
+
flush_after_seconds,
|
|
701
|
+
serializer,
|
|
629
702
|
),
|
|
630
703
|
):
|
|
631
704
|
yield from result
|
|
@@ -15,31 +15,31 @@
|
|
|
15
15
|
# specific language governing permissions and limitations
|
|
16
16
|
# under the License.
|
|
17
17
|
|
|
18
|
-
from
|
|
18
|
+
from ...helpers.vectorstore._async.embedding_service import (
|
|
19
19
|
AsyncElasticsearchEmbeddings,
|
|
20
20
|
AsyncEmbeddingService,
|
|
21
21
|
)
|
|
22
|
-
from
|
|
22
|
+
from ...helpers.vectorstore._async.strategies import (
|
|
23
23
|
AsyncBM25Strategy,
|
|
24
24
|
AsyncDenseVectorScriptScoreStrategy,
|
|
25
25
|
AsyncDenseVectorStrategy,
|
|
26
26
|
AsyncRetrievalStrategy,
|
|
27
27
|
AsyncSparseVectorStrategy,
|
|
28
28
|
)
|
|
29
|
-
from
|
|
30
|
-
from
|
|
29
|
+
from ...helpers.vectorstore._async.vectorstore import AsyncVectorStore
|
|
30
|
+
from ...helpers.vectorstore._sync.embedding_service import (
|
|
31
31
|
ElasticsearchEmbeddings,
|
|
32
32
|
EmbeddingService,
|
|
33
33
|
)
|
|
34
|
-
from
|
|
34
|
+
from ...helpers.vectorstore._sync.strategies import (
|
|
35
35
|
BM25Strategy,
|
|
36
36
|
DenseVectorScriptScoreStrategy,
|
|
37
37
|
DenseVectorStrategy,
|
|
38
38
|
RetrievalStrategy,
|
|
39
39
|
SparseVectorStrategy,
|
|
40
40
|
)
|
|
41
|
-
from
|
|
42
|
-
from
|
|
41
|
+
from ...helpers.vectorstore._sync.vectorstore import VectorStore
|
|
42
|
+
from ...helpers.vectorstore._utils import DistanceMetric
|
|
43
43
|
|
|
44
44
|
__all__ = [
|
|
45
45
|
"AsyncBM25Strategy",
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# specific language governing permissions and limitations
|
|
16
16
|
# under the License.
|
|
17
17
|
|
|
18
|
-
from
|
|
18
|
+
from .... import AsyncElasticsearch, BadRequestError, NotFoundError
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
async def model_must_be_deployed(client: AsyncElasticsearch, model_id: str) -> None:
|
|
@@ -18,8 +18,8 @@
|
|
|
18
18
|
from abc import ABC, abstractmethod
|
|
19
19
|
from typing import List
|
|
20
20
|
|
|
21
|
-
from
|
|
22
|
-
from
|
|
21
|
+
from .... import AsyncElasticsearch
|
|
22
|
+
from ...._version import __versionstr__ as lib_version
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class AsyncEmbeddingService(ABC):
|
|
@@ -18,9 +18,9 @@
|
|
|
18
18
|
from abc import ABC, abstractmethod
|
|
19
19
|
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
|
20
20
|
|
|
21
|
-
from
|
|
22
|
-
from
|
|
23
|
-
from
|
|
21
|
+
from .... import AsyncElasticsearch
|
|
22
|
+
from ....helpers.vectorstore._async._utils import model_must_be_deployed
|
|
23
|
+
from ....helpers.vectorstore._utils import DistanceMetric
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class AsyncRetrievalStrategy(ABC):
|