elasticsearch 9.1.1__py3-none-any.whl → 9.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. elasticsearch/_async/client/__init__.py +69 -65
  2. elasticsearch/_async/client/async_search.py +3 -3
  3. elasticsearch/_async/client/autoscaling.py +8 -4
  4. elasticsearch/_async/client/cat.py +521 -27
  5. elasticsearch/_async/client/ccr.py +10 -10
  6. elasticsearch/_async/client/cluster.py +34 -33
  7. elasticsearch/_async/client/connector.py +45 -44
  8. elasticsearch/_async/client/dangling_indices.py +8 -12
  9. elasticsearch/_async/client/enrich.py +10 -10
  10. elasticsearch/_async/client/eql.py +10 -10
  11. elasticsearch/_async/client/esql.py +16 -16
  12. elasticsearch/_async/client/features.py +6 -6
  13. elasticsearch/_async/client/fleet.py +8 -12
  14. elasticsearch/_async/client/graph.py +3 -7
  15. elasticsearch/_async/client/ilm.py +20 -28
  16. elasticsearch/_async/client/indices.py +163 -169
  17. elasticsearch/_async/client/inference.py +41 -127
  18. elasticsearch/_async/client/ingest.py +9 -9
  19. elasticsearch/_async/client/license.py +5 -7
  20. elasticsearch/_async/client/logstash.py +7 -5
  21. elasticsearch/_async/client/migration.py +6 -6
  22. elasticsearch/_async/client/ml.py +125 -85
  23. elasticsearch/_async/client/monitoring.py +4 -3
  24. elasticsearch/_async/client/nodes.py +17 -17
  25. elasticsearch/_async/client/query_rules.py +16 -16
  26. elasticsearch/_async/client/rollup.py +21 -21
  27. elasticsearch/_async/client/search_application.py +19 -19
  28. elasticsearch/_async/client/searchable_snapshots.py +10 -10
  29. elasticsearch/_async/client/security.py +8 -7
  30. elasticsearch/_async/client/shutdown.py +14 -19
  31. elasticsearch/_async/client/simulate.py +4 -4
  32. elasticsearch/_async/client/slm.py +18 -22
  33. elasticsearch/_async/client/snapshot.py +20 -20
  34. elasticsearch/_async/client/sql.py +10 -10
  35. elasticsearch/_async/client/streams.py +186 -0
  36. elasticsearch/_async/client/synonyms.py +10 -10
  37. elasticsearch/_async/client/tasks.py +8 -8
  38. elasticsearch/_async/client/text_structure.py +13 -9
  39. elasticsearch/_async/client/transform.py +51 -12
  40. elasticsearch/_async/client/utils.py +4 -2
  41. elasticsearch/_async/client/watcher.py +27 -31
  42. elasticsearch/_async/client/xpack.py +6 -5
  43. elasticsearch/_async/helpers.py +58 -9
  44. elasticsearch/_sync/client/__init__.py +71 -65
  45. elasticsearch/_sync/client/async_search.py +3 -3
  46. elasticsearch/_sync/client/autoscaling.py +8 -4
  47. elasticsearch/_sync/client/cat.py +521 -27
  48. elasticsearch/_sync/client/ccr.py +10 -10
  49. elasticsearch/_sync/client/cluster.py +34 -33
  50. elasticsearch/_sync/client/connector.py +45 -44
  51. elasticsearch/_sync/client/dangling_indices.py +8 -12
  52. elasticsearch/_sync/client/enrich.py +10 -10
  53. elasticsearch/_sync/client/eql.py +10 -10
  54. elasticsearch/_sync/client/esql.py +16 -16
  55. elasticsearch/_sync/client/features.py +6 -6
  56. elasticsearch/_sync/client/fleet.py +8 -12
  57. elasticsearch/_sync/client/graph.py +3 -7
  58. elasticsearch/_sync/client/ilm.py +20 -28
  59. elasticsearch/_sync/client/indices.py +163 -169
  60. elasticsearch/_sync/client/inference.py +41 -127
  61. elasticsearch/_sync/client/ingest.py +9 -9
  62. elasticsearch/_sync/client/license.py +5 -7
  63. elasticsearch/_sync/client/logstash.py +7 -5
  64. elasticsearch/_sync/client/migration.py +6 -6
  65. elasticsearch/_sync/client/ml.py +125 -85
  66. elasticsearch/_sync/client/monitoring.py +4 -3
  67. elasticsearch/_sync/client/nodes.py +17 -17
  68. elasticsearch/_sync/client/query_rules.py +16 -16
  69. elasticsearch/_sync/client/rollup.py +21 -21
  70. elasticsearch/_sync/client/search_application.py +19 -19
  71. elasticsearch/_sync/client/searchable_snapshots.py +10 -10
  72. elasticsearch/_sync/client/security.py +8 -7
  73. elasticsearch/_sync/client/shutdown.py +14 -19
  74. elasticsearch/_sync/client/simulate.py +4 -4
  75. elasticsearch/_sync/client/slm.py +18 -22
  76. elasticsearch/_sync/client/snapshot.py +20 -20
  77. elasticsearch/_sync/client/sql.py +10 -10
  78. elasticsearch/_sync/client/streams.py +186 -0
  79. elasticsearch/_sync/client/synonyms.py +10 -10
  80. elasticsearch/_sync/client/tasks.py +8 -8
  81. elasticsearch/_sync/client/text_structure.py +13 -9
  82. elasticsearch/_sync/client/transform.py +51 -12
  83. elasticsearch/_sync/client/utils.py +16 -2
  84. elasticsearch/_sync/client/watcher.py +27 -31
  85. elasticsearch/_sync/client/xpack.py +6 -5
  86. elasticsearch/_version.py +2 -1
  87. elasticsearch/client.py +2 -0
  88. elasticsearch/compat.py +43 -1
  89. elasticsearch/dsl/__init__.py +28 -0
  90. elasticsearch/dsl/_async/document.py +4 -5
  91. elasticsearch/dsl/_async/index.py +1 -1
  92. elasticsearch/dsl/_async/search.py +2 -3
  93. elasticsearch/dsl/_sync/document.py +4 -5
  94. elasticsearch/dsl/_sync/index.py +1 -1
  95. elasticsearch/dsl/_sync/search.py +2 -3
  96. elasticsearch/dsl/aggs.py +100 -3
  97. elasticsearch/dsl/async_connections.py +1 -2
  98. elasticsearch/dsl/connections.py +1 -2
  99. elasticsearch/dsl/document_base.py +15 -0
  100. elasticsearch/dsl/field.py +12 -1
  101. elasticsearch/dsl/query.py +23 -0
  102. elasticsearch/dsl/response/__init__.py +3 -0
  103. elasticsearch/dsl/serializer.py +1 -2
  104. elasticsearch/dsl/types.py +185 -5
  105. elasticsearch/dsl/utils.py +1 -2
  106. elasticsearch/esql/esql.py +1 -1
  107. elasticsearch/esql/functions.py +2 -2
  108. elasticsearch/helpers/__init__.py +10 -1
  109. elasticsearch/helpers/actions.py +106 -33
  110. elasticsearch/helpers/vectorstore/__init__.py +7 -7
  111. elasticsearch/helpers/vectorstore/_async/_utils.py +1 -1
  112. elasticsearch/helpers/vectorstore/_async/embedding_service.py +2 -2
  113. elasticsearch/helpers/vectorstore/_async/strategies.py +3 -3
  114. elasticsearch/helpers/vectorstore/_async/vectorstore.py +5 -5
  115. elasticsearch/helpers/vectorstore/_sync/_utils.py +1 -1
  116. elasticsearch/helpers/vectorstore/_sync/embedding_service.py +2 -2
  117. elasticsearch/helpers/vectorstore/_sync/strategies.py +3 -3
  118. elasticsearch/helpers/vectorstore/_sync/vectorstore.py +5 -5
  119. {elasticsearch-9.1.1.dist-info → elasticsearch-9.1.3.dist-info}/METADATA +2 -2
  120. elasticsearch-9.1.3.dist-info/RECORD +165 -0
  121. {elasticsearch-9.1.1.dist-info → elasticsearch-9.1.3.dist-info}/WHEEL +1 -1
  122. elasticsearch-9.1.1.dist-info/RECORD +0 -163
  123. {elasticsearch-9.1.1.dist-info → elasticsearch-9.1.3.dist-info}/licenses/LICENSE +0 -0
  124. {elasticsearch-9.1.1.dist-info → elasticsearch-9.1.3.dist-info}/licenses/NOTICE +0 -0
@@ -938,6 +938,7 @@ class GeoDistanceSort(AttrDict[Any]):
938
938
  Dict[str, Any],
939
939
  "DefaultType",
940
940
  ] = DEFAULT,
941
+ /,
941
942
  *,
942
943
  mode: Union[
943
944
  Literal["min", "max", "sum", "avg", "median"], DefaultType
@@ -2774,6 +2775,31 @@ class NumericFielddata(AttrDict[Any]):
2774
2775
  super().__init__(kwargs)
2775
2776
 
2776
2777
 
2778
+ class PValueHeuristic(AttrDict[Any]):
2779
+ """
2780
+ :arg background_is_superset:
2781
+ :arg normalize_above: Should the results be normalized when above the
2782
+ given value. Allows for consistent significance results at various
2783
+ scales. Note: `0` is a special value which means no normalization
2784
+ """
2785
+
2786
+ background_is_superset: Union[bool, DefaultType]
2787
+ normalize_above: Union[int, DefaultType]
2788
+
2789
+ def __init__(
2790
+ self,
2791
+ *,
2792
+ background_is_superset: Union[bool, DefaultType] = DEFAULT,
2793
+ normalize_above: Union[int, DefaultType] = DEFAULT,
2794
+ **kwargs: Any,
2795
+ ):
2796
+ if background_is_superset is not DEFAULT:
2797
+ kwargs["background_is_superset"] = background_is_superset
2798
+ if normalize_above is not DEFAULT:
2799
+ kwargs["normalize_above"] = normalize_above
2800
+ super().__init__(kwargs)
2801
+
2802
+
2777
2803
  class PercentageScoreHeuristic(AttrDict[Any]):
2778
2804
  pass
2779
2805
 
@@ -3219,6 +3245,7 @@ class SortOptions(AttrDict[Any]):
3219
3245
  self,
3220
3246
  _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
3221
3247
  _value: Union["FieldSort", Dict[str, Any], "DefaultType"] = DEFAULT,
3248
+ /,
3222
3249
  *,
3223
3250
  _score: Union["ScoreSort", Dict[str, Any], DefaultType] = DEFAULT,
3224
3251
  _doc: Union["ScoreSort", Dict[str, Any], DefaultType] = DEFAULT,
@@ -4009,24 +4036,25 @@ class TestPopulation(AttrDict[Any]):
4009
4036
 
4010
4037
  class TextEmbedding(AttrDict[Any]):
4011
4038
  """
4012
- :arg model_id: (required)
4013
4039
  :arg model_text: (required)
4040
+ :arg model_id: Model ID is required for all dense_vector fields but
4041
+ may be inferred for semantic_text fields
4014
4042
  """
4015
4043
 
4016
- model_id: Union[str, DefaultType]
4017
4044
  model_text: Union[str, DefaultType]
4045
+ model_id: Union[str, DefaultType]
4018
4046
 
4019
4047
  def __init__(
4020
4048
  self,
4021
4049
  *,
4022
- model_id: Union[str, DefaultType] = DEFAULT,
4023
4050
  model_text: Union[str, DefaultType] = DEFAULT,
4051
+ model_id: Union[str, DefaultType] = DEFAULT,
4024
4052
  **kwargs: Any,
4025
4053
  ):
4026
- if model_id is not DEFAULT:
4027
- kwargs["model_id"] = model_id
4028
4054
  if model_text is not DEFAULT:
4029
4055
  kwargs["model_text"] = model_text
4056
+ if model_id is not DEFAULT:
4057
+ kwargs["model_id"] = model_id
4030
4058
  super().__init__(kwargs)
4031
4059
 
4032
4060
 
@@ -4659,6 +4687,82 @@ class CardinalityAggregate(AttrDict[Any]):
4659
4687
  meta: Mapping[str, Any]
4660
4688
 
4661
4689
 
4690
+ class CartesianBoundsAggregate(AttrDict[Any]):
4691
+ """
4692
+ :arg bounds:
4693
+ :arg meta:
4694
+ """
4695
+
4696
+ bounds: "TopLeftBottomRightGeoBounds"
4697
+ meta: Mapping[str, Any]
4698
+
4699
+
4700
+ class CartesianCentroidAggregate(AttrDict[Any]):
4701
+ """
4702
+ :arg count: (required)
4703
+ :arg location:
4704
+ :arg meta:
4705
+ """
4706
+
4707
+ count: int
4708
+ location: "CartesianPoint"
4709
+ meta: Mapping[str, Any]
4710
+
4711
+
4712
+ class CartesianPoint(AttrDict[Any]):
4713
+ """
4714
+ :arg x: (required)
4715
+ :arg y: (required)
4716
+ """
4717
+
4718
+ x: float
4719
+ y: float
4720
+
4721
+
4722
+ class ChangePointAggregate(AttrDict[Any]):
4723
+ """
4724
+ :arg type: (required)
4725
+ :arg bucket:
4726
+ :arg meta:
4727
+ """
4728
+
4729
+ type: "ChangeType"
4730
+ bucket: "ChangePointBucket"
4731
+ meta: Mapping[str, Any]
4732
+
4733
+
4734
+ class ChangePointBucket(AttrDict[Any]):
4735
+ """
4736
+ :arg key: (required)
4737
+ :arg doc_count: (required)
4738
+ """
4739
+
4740
+ key: Union[int, float, str, bool, None]
4741
+ doc_count: int
4742
+
4743
+
4744
+ class ChangeType(AttrDict[Any]):
4745
+ """
4746
+ :arg dip:
4747
+ :arg distribution_change:
4748
+ :arg indeterminable:
4749
+ :arg non_stationary:
4750
+ :arg spike:
4751
+ :arg stationary:
4752
+ :arg step_change:
4753
+ :arg trend_change:
4754
+ """
4755
+
4756
+ dip: "Dip"
4757
+ distribution_change: "DistributionChange"
4758
+ indeterminable: "Indeterminable"
4759
+ non_stationary: "NonStationary"
4760
+ spike: "Spike"
4761
+ stationary: "Stationary"
4762
+ step_change: "StepChange"
4763
+ trend_change: "TrendChange"
4764
+
4765
+
4662
4766
  class ChildrenAggregate(AttrDict[Any]):
4663
4767
  """
4664
4768
  :arg doc_count: (required)
@@ -4936,6 +5040,26 @@ class DfsStatisticsProfile(AttrDict[Any]):
4936
5040
  children: Sequence["DfsStatisticsProfile"]
4937
5041
 
4938
5042
 
5043
+ class Dip(AttrDict[Any]):
5044
+ """
5045
+ :arg p_value: (required)
5046
+ :arg change_point: (required)
5047
+ """
5048
+
5049
+ p_value: float
5050
+ change_point: int
5051
+
5052
+
5053
+ class DistributionChange(AttrDict[Any]):
5054
+ """
5055
+ :arg p_value: (required)
5056
+ :arg change_point: (required)
5057
+ """
5058
+
5059
+ p_value: float
5060
+ change_point: int
5061
+
5062
+
4939
5063
  class DoubleTermsAggregate(AttrDict[Any]):
4940
5064
  """
4941
5065
  Result of a `terms` aggregation when the field is some kind of decimal
@@ -5497,6 +5621,14 @@ class HitsMetadata(AttrDict[Any]):
5497
5621
  max_score: Union[float, None]
5498
5622
 
5499
5623
 
5624
+ class Indeterminable(AttrDict[Any]):
5625
+ """
5626
+ :arg reason: (required)
5627
+ """
5628
+
5629
+ reason: str
5630
+
5631
+
5500
5632
  class InferenceAggregate(AttrDict[Any]):
5501
5633
  """
5502
5634
  :arg value:
@@ -5899,6 +6031,18 @@ class NestedIdentity(AttrDict[Any]):
5899
6031
  _nested: "NestedIdentity"
5900
6032
 
5901
6033
 
6034
+ class NonStationary(AttrDict[Any]):
6035
+ """
6036
+ :arg p_value: (required)
6037
+ :arg r_value: (required)
6038
+ :arg trend: (required)
6039
+ """
6040
+
6041
+ p_value: float
6042
+ r_value: float
6043
+ trend: str
6044
+
6045
+
5902
6046
  class ParentAggregate(AttrDict[Any]):
5903
6047
  """
5904
6048
  :arg doc_count: (required)
@@ -6256,6 +6400,16 @@ class SimpleValueAggregate(AttrDict[Any]):
6256
6400
  meta: Mapping[str, Any]
6257
6401
 
6258
6402
 
6403
+ class Spike(AttrDict[Any]):
6404
+ """
6405
+ :arg p_value: (required)
6406
+ :arg change_point: (required)
6407
+ """
6408
+
6409
+ p_value: float
6410
+ change_point: int
6411
+
6412
+
6259
6413
  class StandardDeviationBounds(AttrDict[Any]):
6260
6414
  """
6261
6415
  :arg upper: (required)
@@ -6292,6 +6446,10 @@ class StandardDeviationBoundsAsString(AttrDict[Any]):
6292
6446
  lower_sampling: str
6293
6447
 
6294
6448
 
6449
+ class Stationary(AttrDict[Any]):
6450
+ pass
6451
+
6452
+
6295
6453
  class StatsAggregate(AttrDict[Any]):
6296
6454
  """
6297
6455
  Statistics aggregation result. `min`, `max` and `avg` are missing if
@@ -6347,6 +6505,16 @@ class StatsBucketAggregate(AttrDict[Any]):
6347
6505
  meta: Mapping[str, Any]
6348
6506
 
6349
6507
 
6508
+ class StepChange(AttrDict[Any]):
6509
+ """
6510
+ :arg p_value: (required)
6511
+ :arg change_point: (required)
6512
+ """
6513
+
6514
+ p_value: float
6515
+ change_point: int
6516
+
6517
+
6350
6518
  class StringRareTermsAggregate(AttrDict[Any]):
6351
6519
  """
6352
6520
  Result of the `rare_terms` aggregation when the field is a string.
@@ -6578,6 +6746,18 @@ class TotalHits(AttrDict[Any]):
6578
6746
  value: int
6579
6747
 
6580
6748
 
6749
+ class TrendChange(AttrDict[Any]):
6750
+ """
6751
+ :arg p_value: (required)
6752
+ :arg r_value: (required)
6753
+ :arg change_point: (required)
6754
+ """
6755
+
6756
+ p_value: float
6757
+ r_value: float
6758
+ change_point: int
6759
+
6760
+
6581
6761
  class UnmappedRareTermsAggregate(AttrDict[Any]):
6582
6762
  """
6583
6763
  Result of a `rare_terms` aggregation when the field is unmapped.
@@ -44,8 +44,7 @@ from .exceptions import UnknownDslObject, ValidationException
44
44
  if TYPE_CHECKING:
45
45
  from elastic_transport import ObjectApiResponse
46
46
 
47
- from elasticsearch import AsyncElasticsearch, Elasticsearch
48
-
47
+ from .. import AsyncElasticsearch, Elasticsearch
49
48
  from .document_base import DocumentOptions
50
49
  from .field import Field
51
50
  from .index_base import IndexBase
@@ -124,7 +124,7 @@ class ESQLBase(ABC):
124
124
  if re.fullmatch(r"[a-zA-Z_@][a-zA-Z0-9_\.]*", s):
125
125
  return s
126
126
  # this identifier needs to be escaped
127
- s.replace("`", "``")
127
+ s = s.replace("`", "``")
128
128
  return f"`{s}`"
129
129
 
130
130
  @staticmethod
@@ -18,8 +18,8 @@
18
18
  import json
19
19
  from typing import Any
20
20
 
21
- from elasticsearch.dsl.document_base import InstrumentedExpression
22
- from elasticsearch.esql.esql import ESQLBase, ExpressionType
21
+ from ..dsl.document_base import InstrumentedExpression
22
+ from ..esql.esql import ESQLBase, ExpressionType
23
23
 
24
24
 
25
25
  def _render(v: Any) -> str:
@@ -19,12 +19,21 @@ from .._async.helpers import async_bulk, async_reindex, async_scan, async_stream
19
19
  from .._utils import fixup_module_metadata
20
20
  from .actions import _chunk_actions # noqa: F401
21
21
  from .actions import _process_bulk_chunk # noqa: F401
22
- from .actions import bulk, expand_action, parallel_bulk, reindex, scan, streaming_bulk
22
+ from .actions import (
23
+ BULK_FLUSH,
24
+ bulk,
25
+ expand_action,
26
+ parallel_bulk,
27
+ reindex,
28
+ scan,
29
+ streaming_bulk,
30
+ )
23
31
  from .errors import BulkIndexError, ScanError
24
32
 
25
33
  __all__ = [
26
34
  "BulkIndexError",
27
35
  "ScanError",
36
+ "BULK_FLUSH",
28
37
  "expand_action",
29
38
  "streaming_bulk",
30
39
  "bulk",
@@ -16,9 +16,10 @@
16
16
  # under the License.
17
17
 
18
18
  import logging
19
+ import queue
19
20
  import time
21
+ from enum import Enum
20
22
  from operator import methodcaller
21
- from queue import Queue
22
23
  from typing import (
23
24
  Any,
24
25
  Callable,
@@ -37,13 +38,21 @@ from typing import (
37
38
  from elastic_transport import OpenTelemetrySpan
38
39
 
39
40
  from .. import Elasticsearch
40
- from ..compat import to_bytes
41
+ from ..compat import safe_thread, to_bytes
41
42
  from ..exceptions import ApiError, NotFoundError, TransportError
42
43
  from ..serializer import Serializer
43
44
  from .errors import BulkIndexError, ScanError
44
45
 
45
46
  logger = logging.getLogger("elasticsearch.helpers")
46
47
 
48
+
49
+ class BulkMeta(Enum):
50
+ flush = 1
51
+ done = 2
52
+
53
+
54
+ BULK_FLUSH = BulkMeta.flush
55
+
47
56
  _TYPE_BULK_ACTION = Union[bytes, str, Dict[str, Any]]
48
57
  _TYPE_BULK_ACTION_HEADER = Dict[str, Any]
49
58
  _TYPE_BULK_ACTION_BODY = Union[None, bytes, Dict[str, Any]]
@@ -51,6 +60,13 @@ _TYPE_BULK_ACTION_HEADER_AND_BODY = Tuple[
51
60
  _TYPE_BULK_ACTION_HEADER, _TYPE_BULK_ACTION_BODY
52
61
  ]
53
62
 
63
+ _TYPE_BULK_ACTION_WITH_META = Union[bytes, str, Dict[str, Any], BulkMeta]
64
+ _TYPE_BULK_ACTION_HEADER_WITH_META = Union[Dict[str, Any], BulkMeta]
65
+ _TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY = Union[
66
+ Tuple[_TYPE_BULK_ACTION_HEADER, _TYPE_BULK_ACTION_BODY],
67
+ Tuple[BulkMeta, Any],
68
+ ]
69
+
54
70
 
55
71
  def expand_action(data: _TYPE_BULK_ACTION) -> _TYPE_BULK_ACTION_HEADER_AND_BODY:
56
72
  """
@@ -139,7 +155,9 @@ class _ActionChunker:
139
155
  ] = []
140
156
 
141
157
  def feed(
142
- self, action: _TYPE_BULK_ACTION_HEADER, data: _TYPE_BULK_ACTION_BODY
158
+ self,
159
+ action: _TYPE_BULK_ACTION_HEADER_WITH_META,
160
+ data: _TYPE_BULK_ACTION_BODY,
143
161
  ) -> Optional[
144
162
  Tuple[
145
163
  List[
@@ -152,23 +170,25 @@ class _ActionChunker:
152
170
  ]
153
171
  ]:
154
172
  ret = None
155
- raw_action = action
156
- raw_data = data
157
- action_bytes = to_bytes(self.serializer.dumps(action), "utf-8")
158
- # +1 to account for the trailing new line character
159
- cur_size = len(action_bytes) + 1
160
-
161
- data_bytes: Optional[bytes]
162
- if data is not None:
163
- data_bytes = to_bytes(self.serializer.dumps(data), "utf-8")
164
- cur_size += len(data_bytes) + 1
165
- else:
166
- data_bytes = None
173
+ action_bytes = b""
174
+ data_bytes: Optional[bytes] = None
175
+ cur_size = 0
176
+ if not isinstance(action, BulkMeta):
177
+ action_bytes = to_bytes(self.serializer.dumps(action), "utf-8")
178
+ # +1 to account for the trailing new line character
179
+ cur_size = len(action_bytes) + 1
180
+
181
+ if data is not None:
182
+ data_bytes = to_bytes(self.serializer.dumps(data), "utf-8")
183
+ cur_size += len(data_bytes) + 1
184
+ else:
185
+ data_bytes = None
167
186
 
168
187
  # full chunk, send it and start a new one
169
188
  if self.bulk_actions and (
170
189
  self.size + cur_size > self.max_chunk_bytes
171
190
  or self.action_count == self.chunk_size
191
+ or (action == BulkMeta.flush and self.bulk_actions)
172
192
  ):
173
193
  ret = (self.bulk_data, self.bulk_actions)
174
194
  self.bulk_actions = []
@@ -176,15 +196,16 @@ class _ActionChunker:
176
196
  self.size = 0
177
197
  self.action_count = 0
178
198
 
179
- self.bulk_actions.append(action_bytes)
180
- if data_bytes is not None:
181
- self.bulk_actions.append(data_bytes)
182
- self.bulk_data.append((raw_action, raw_data))
183
- else:
184
- self.bulk_data.append((raw_action,))
199
+ if not isinstance(action, BulkMeta):
200
+ self.bulk_actions.append(action_bytes)
201
+ if data_bytes is not None:
202
+ self.bulk_actions.append(data_bytes)
203
+ self.bulk_data.append((action, data))
204
+ else:
205
+ self.bulk_data.append((action,))
185
206
 
186
- self.size += cur_size
187
- self.action_count += 1
207
+ self.size += cur_size
208
+ self.action_count += 1
188
209
  return ret
189
210
 
190
211
  def flush(
@@ -209,9 +230,10 @@ class _ActionChunker:
209
230
 
210
231
 
211
232
  def _chunk_actions(
212
- actions: Iterable[_TYPE_BULK_ACTION_HEADER_AND_BODY],
233
+ actions: Iterable[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY],
213
234
  chunk_size: int,
214
235
  max_chunk_bytes: int,
236
+ flush_after_seconds: Optional[float],
215
237
  serializer: Serializer,
216
238
  ) -> Iterable[
217
239
  Tuple[
@@ -231,10 +253,41 @@ def _chunk_actions(
231
253
  chunker = _ActionChunker(
232
254
  chunk_size=chunk_size, max_chunk_bytes=max_chunk_bytes, serializer=serializer
233
255
  )
234
- for action, data in actions:
235
- ret = chunker.feed(action, data)
236
- if ret:
237
- yield ret
256
+
257
+ if not flush_after_seconds:
258
+ for action, data in actions:
259
+ ret = chunker.feed(action, data)
260
+ if ret:
261
+ yield ret
262
+ else:
263
+ item_queue: queue.Queue[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY] = (
264
+ queue.Queue()
265
+ )
266
+
267
+ def get_items() -> None:
268
+ try:
269
+ for item in actions:
270
+ item_queue.put(item)
271
+ finally:
272
+ # make sure we signal the end even if there is an exception
273
+ item_queue.put((BulkMeta.done, None))
274
+
275
+ with safe_thread(get_items):
276
+ timeout: Optional[float] = flush_after_seconds
277
+ while True:
278
+ try:
279
+ action, data = item_queue.get(timeout=timeout)
280
+ timeout = flush_after_seconds
281
+ except queue.Empty:
282
+ action, data = BulkMeta.flush, None
283
+ timeout = None
284
+
285
+ if action is BulkMeta.done:
286
+ break
287
+ ret = chunker.feed(action, data)
288
+ if ret:
289
+ yield ret
290
+
238
291
  ret = chunker.flush()
239
292
  if ret:
240
293
  yield ret
@@ -361,9 +414,10 @@ def _process_bulk_chunk(
361
414
 
362
415
  def streaming_bulk(
363
416
  client: Elasticsearch,
364
- actions: Iterable[_TYPE_BULK_ACTION],
417
+ actions: Iterable[_TYPE_BULK_ACTION_WITH_META],
365
418
  chunk_size: int = 500,
366
419
  max_chunk_bytes: int = 100 * 1024 * 1024,
420
+ flush_after_seconds: Optional[float] = None,
367
421
  raise_on_error: bool = True,
368
422
  expand_action_callback: Callable[
369
423
  [_TYPE_BULK_ACTION], _TYPE_BULK_ACTION_HEADER_AND_BODY
@@ -397,6 +451,9 @@ def streaming_bulk(
397
451
  :arg actions: iterable containing the actions to be executed
398
452
  :arg chunk_size: number of docs in one chunk sent to es (default: 500)
399
453
  :arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB)
454
+ :arg flush_after_seconds: time in seconds after which a chunk is written even
455
+ if hasn't reached `chunk_size` or `max_chunk_bytes`. Set to 0 to not use a
456
+ timeout-based flush. (default: 0)
400
457
  :arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`)
401
458
  from the execution of the last chunk when some occur. By default we raise.
402
459
  :arg raise_on_exception: if ``False`` then don't propagate exceptions from
@@ -425,6 +482,13 @@ def streaming_bulk(
425
482
 
426
483
  serializer = client.transport.serializers.get_serializer("application/json")
427
484
 
485
+ def expand_action_with_meta(
486
+ data: _TYPE_BULK_ACTION_WITH_META,
487
+ ) -> _TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY:
488
+ if isinstance(data, BulkMeta):
489
+ return data, None
490
+ return expand_action_callback(data)
491
+
428
492
  bulk_data: List[
429
493
  Union[
430
494
  Tuple[_TYPE_BULK_ACTION_HEADER],
@@ -433,9 +497,10 @@ def streaming_bulk(
433
497
  ]
434
498
  bulk_actions: List[bytes]
435
499
  for bulk_data, bulk_actions in _chunk_actions(
436
- map(expand_action_callback, actions),
500
+ map(expand_action_with_meta, actions),
437
501
  chunk_size,
438
502
  max_chunk_bytes,
503
+ flush_after_seconds,
439
504
  serializer,
440
505
  ):
441
506
  for attempt in range(max_retries + 1):
@@ -557,6 +622,7 @@ def parallel_bulk(
557
622
  thread_count: int = 4,
558
623
  chunk_size: int = 500,
559
624
  max_chunk_bytes: int = 100 * 1024 * 1024,
625
+ flush_after_seconds: Optional[float] = None,
560
626
  queue_size: int = 4,
561
627
  expand_action_callback: Callable[
562
628
  [_TYPE_BULK_ACTION], _TYPE_BULK_ACTION_HEADER_AND_BODY
@@ -573,6 +639,9 @@ def parallel_bulk(
573
639
  :arg thread_count: size of the threadpool to use for the bulk requests
574
640
  :arg chunk_size: number of docs in one chunk sent to es (default: 500)
575
641
  :arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB)
642
+ :arg flush_after_seconds: time in seconds after which a chunk is written even
643
+ if hasn't reached `chunk_size` or `max_chunk_bytes`. Set to 0 to not use a
644
+ timeout-based flush. (default: 0)
576
645
  :arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`)
577
646
  from the execution of the last chunk when some occur. By default we raise.
578
647
  :arg raise_on_exception: if ``False`` then don't propagate exceptions from
@@ -596,7 +665,7 @@ def parallel_bulk(
596
665
  super()._setup_queues() # type: ignore[misc]
597
666
  # The queue must be at least the size of the number of threads to
598
667
  # prevent hanging when inserting sentinel values during teardown.
599
- self._inqueue: Queue[
668
+ self._inqueue: queue.Queue[
600
669
  Tuple[
601
670
  List[
602
671
  Union[
@@ -605,7 +674,7 @@ def parallel_bulk(
605
674
  ],
606
675
  List[bytes],
607
676
  ]
608
- ] = Queue(max(queue_size, thread_count))
677
+ ] = queue.Queue(max(queue_size, thread_count))
609
678
  self._quick_put = self._inqueue.put
610
679
 
611
680
  with client._otel.helpers_span("helpers.parallel_bulk") as otel_span:
@@ -625,7 +694,11 @@ def parallel_bulk(
625
694
  )
626
695
  ),
627
696
  _chunk_actions(
628
- expanded_actions, chunk_size, max_chunk_bytes, serializer
697
+ expanded_actions,
698
+ chunk_size,
699
+ max_chunk_bytes,
700
+ flush_after_seconds,
701
+ serializer,
629
702
  ),
630
703
  ):
631
704
  yield from result
@@ -15,31 +15,31 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- from elasticsearch.helpers.vectorstore._async.embedding_service import (
18
+ from ...helpers.vectorstore._async.embedding_service import (
19
19
  AsyncElasticsearchEmbeddings,
20
20
  AsyncEmbeddingService,
21
21
  )
22
- from elasticsearch.helpers.vectorstore._async.strategies import (
22
+ from ...helpers.vectorstore._async.strategies import (
23
23
  AsyncBM25Strategy,
24
24
  AsyncDenseVectorScriptScoreStrategy,
25
25
  AsyncDenseVectorStrategy,
26
26
  AsyncRetrievalStrategy,
27
27
  AsyncSparseVectorStrategy,
28
28
  )
29
- from elasticsearch.helpers.vectorstore._async.vectorstore import AsyncVectorStore
30
- from elasticsearch.helpers.vectorstore._sync.embedding_service import (
29
+ from ...helpers.vectorstore._async.vectorstore import AsyncVectorStore
30
+ from ...helpers.vectorstore._sync.embedding_service import (
31
31
  ElasticsearchEmbeddings,
32
32
  EmbeddingService,
33
33
  )
34
- from elasticsearch.helpers.vectorstore._sync.strategies import (
34
+ from ...helpers.vectorstore._sync.strategies import (
35
35
  BM25Strategy,
36
36
  DenseVectorScriptScoreStrategy,
37
37
  DenseVectorStrategy,
38
38
  RetrievalStrategy,
39
39
  SparseVectorStrategy,
40
40
  )
41
- from elasticsearch.helpers.vectorstore._sync.vectorstore import VectorStore
42
- from elasticsearch.helpers.vectorstore._utils import DistanceMetric
41
+ from ...helpers.vectorstore._sync.vectorstore import VectorStore
42
+ from ...helpers.vectorstore._utils import DistanceMetric
43
43
 
44
44
  __all__ = [
45
45
  "AsyncBM25Strategy",
@@ -15,7 +15,7 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- from elasticsearch import AsyncElasticsearch, BadRequestError, NotFoundError
18
+ from .... import AsyncElasticsearch, BadRequestError, NotFoundError
19
19
 
20
20
 
21
21
  async def model_must_be_deployed(client: AsyncElasticsearch, model_id: str) -> None:
@@ -18,8 +18,8 @@
18
18
  from abc import ABC, abstractmethod
19
19
  from typing import List
20
20
 
21
- from elasticsearch import AsyncElasticsearch
22
- from elasticsearch._version import __versionstr__ as lib_version
21
+ from .... import AsyncElasticsearch
22
+ from ...._version import __versionstr__ as lib_version
23
23
 
24
24
 
25
25
  class AsyncEmbeddingService(ABC):
@@ -18,9 +18,9 @@
18
18
  from abc import ABC, abstractmethod
19
19
  from typing import Any, Dict, List, Optional, Tuple, Union, cast
20
20
 
21
- from elasticsearch import AsyncElasticsearch
22
- from elasticsearch.helpers.vectorstore._async._utils import model_must_be_deployed
23
- from elasticsearch.helpers.vectorstore._utils import DistanceMetric
21
+ from .... import AsyncElasticsearch
22
+ from ....helpers.vectorstore._async._utils import model_must_be_deployed
23
+ from ....helpers.vectorstore._utils import DistanceMetric
24
24
 
25
25
 
26
26
  class AsyncRetrievalStrategy(ABC):