apache-airflow-providers-elasticsearch 5.4.0rc1__py3-none-any.whl → 5.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-elasticsearch might be problematic. Click here for more details.

@@ -25,18 +25,15 @@ from __future__ import annotations
25
25
 
26
26
  import packaging.version
27
27
 
28
- __all__ = ["__version__"]
28
+ from airflow import __version__ as airflow_version
29
29
 
30
- __version__ = "5.4.0"
30
+ __all__ = ["__version__"]
31
31
 
32
- try:
33
- from airflow import __version__ as airflow_version
34
- except ImportError:
35
- from airflow.version import version as airflow_version
32
+ __version__ = "5.4.1"
36
33
 
37
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
38
- "2.6.0"
35
+ "2.7.0"
39
36
  ):
40
37
  raise RuntimeError(
41
- f"The package `apache-airflow-providers-elasticsearch:{__version__}` needs Apache Airflow 2.6.0+"
38
+ f"The package `apache-airflow-providers-elasticsearch:{__version__}` needs Apache Airflow 2.7.0+"
42
39
  )
@@ -28,9 +28,13 @@ def get_provider_info():
28
28
  "name": "Elasticsearch",
29
29
  "description": "`Elasticsearch <https://www.elastic.co/elasticsearch>`__\n",
30
30
  "state": "ready",
31
- "source-date-epoch": 1705912070,
31
+ "source-date-epoch": 1716287658,
32
32
  "versions": [
33
+ "5.4.1",
33
34
  "5.4.0",
35
+ "5.3.4",
36
+ "5.3.3",
37
+ "5.3.2",
34
38
  "5.3.1",
35
39
  "5.3.0",
36
40
  "5.2.0",
@@ -66,7 +70,7 @@ def get_provider_info():
66
70
  "1.0.0",
67
71
  ],
68
72
  "dependencies": [
69
- "apache-airflow>=2.6.0",
73
+ "apache-airflow>=2.7.0",
70
74
  "apache-airflow-providers-common-sql>=1.3.1",
71
75
  "elasticsearch>=8.10,<9",
72
76
  ],
@@ -160,12 +164,19 @@ def get_provider_info():
160
164
  "default": "offset",
161
165
  },
162
166
  "index_patterns": {
163
- "description": "Comma separated list of index patterns to use when searching for logs (default: `_all`).\n",
167
+ "description": "Comma separated list of index patterns to use when searching for logs (default: `_all`).\nThe index_patterns_callable takes precedence over this.\n",
164
168
  "version_added": "2.6.0",
165
169
  "type": "string",
166
170
  "example": "something-*",
167
171
  "default": "_all",
168
172
  },
173
+ "index_patterns_callable": {
174
+ "description": "A string representing the full path to the Python callable path which accept TI object and\nreturn comma separated list of index patterns. This will takes precedence over index_patterns.\n",
175
+ "version_added": "5.5.0",
176
+ "type": "string",
177
+ "example": "module.callable",
178
+ "default": "",
179
+ },
169
180
  },
170
181
  },
171
182
  "elasticsearch_configs": {
@@ -17,11 +17,11 @@
17
17
  # under the License.
18
18
  from __future__ import annotations
19
19
 
20
- import warnings
21
20
  from functools import cached_property
22
21
  from typing import TYPE_CHECKING, Any
23
22
  from urllib import parse
24
23
 
24
+ from deprecated import deprecated
25
25
  from elasticsearch import Elasticsearch
26
26
 
27
27
  from airflow.exceptions import AirflowProviderDeprecationWarning
@@ -138,6 +138,10 @@ class ElasticsearchSQLHook(DbApiHook):
138
138
  return uri
139
139
 
140
140
 
141
+ @deprecated(
142
+ reason="Please use `airflow.providers.elasticsearch.hooks.elasticsearch.ElasticsearchSQLHook`.",
143
+ category=AirflowProviderDeprecationWarning,
144
+ )
141
145
  class ElasticsearchHook(ElasticsearchSQLHook):
142
146
  """
143
147
  This class is deprecated and was renamed to ElasticsearchSQLHook.
@@ -146,12 +150,6 @@ class ElasticsearchHook(ElasticsearchSQLHook):
146
150
  """
147
151
 
148
152
  def __init__(self, *args, **kwargs):
149
- warnings.warn(
150
- """This class is deprecated.
151
- Please use `airflow.providers.elasticsearch.hooks.elasticsearch.ElasticsearchSQLHook`.""",
152
- AirflowProviderDeprecationWarning,
153
- stacklevel=3,
154
- )
155
153
  super().__init__(*args, **kwargs)
156
154
 
157
155
 
@@ -164,8 +162,8 @@ class ElasticsearchPythonHook(BaseHook):
164
162
  Example: {"ca_cert":"/path/to/cert", "basic_auth": "(user, pass)"}
165
163
  """
166
164
 
167
- def __init__(self, hosts: list[Any], es_conn_args: dict | None = None, **kwargs):
168
- super().__init__(**kwargs)
165
+ def __init__(self, hosts: list[Any], es_conn_args: dict | None = None):
166
+ super().__init__()
169
167
  self.hosts = hosts
170
168
  self.es_conn_args = es_conn_args or {}
171
169
 
@@ -16,6 +16,8 @@
16
16
  # under the License.
17
17
  from __future__ import annotations
18
18
 
19
+ from datetime import datetime
20
+
19
21
  import pendulum
20
22
 
21
23
  from airflow.utils.log.json_formatter import JSONFormatter
@@ -30,7 +32,9 @@ class ElasticsearchJSONFormatter(JSONFormatter):
30
32
 
31
33
  def formatTime(self, record, datefmt=None):
32
34
  """Return the creation time of the LogRecord in ISO 8601 date/time format in the local time zone."""
33
- dt = pendulum.from_timestamp(record.created, tz=pendulum.local_timezone())
35
+ # TODO: Use airflow.utils.timezone.from_timestamp(record.created, tz="local")
36
+ # as soon as min Airflow 2.9.0
37
+ dt = datetime.fromtimestamp(record.created, tz=pendulum.local_timezone())
34
38
  s = dt.strftime(datefmt or self.default_time_format)
35
39
  if self.default_msec_format:
36
40
  s = self.default_msec_format % (s, record.msecs)
@@ -34,15 +34,18 @@ class AttributeList:
34
34
  self._l_ = _list
35
35
 
36
36
  def __getitem__(self, k):
37
+ """Retrieve an item or a slice from the list. If the item is a dictionary, it is wrapped in an AttributeDict."""
37
38
  val = self._l_[k]
38
39
  if isinstance(val, slice):
39
40
  return AttributeList(val)
40
41
  return _wrap(val)
41
42
 
42
43
  def __iter__(self):
44
+ """Provide an iterator for the list or the dictionary."""
43
45
  return (_wrap(i) for i in self._l_)
44
46
 
45
47
  def __bool__(self):
48
+ """Check if the list is non-empty."""
46
49
  return bool(self._l_)
47
50
 
48
51
 
@@ -53,12 +56,14 @@ class AttributeDict:
53
56
  super().__setattr__("_d_", d)
54
57
 
55
58
  def __getattr__(self, attr_name):
59
+ """Retrieve an item as an attribute from the dictionary."""
56
60
  try:
57
61
  return self.__getitem__(attr_name)
58
62
  except KeyError:
59
63
  raise AttributeError(f"{self.__class__.__name__!r} object has no attribute {attr_name!r}")
60
64
 
61
65
  def __getitem__(self, key):
66
+ """Retrieve an item using a key from the dictionary."""
62
67
  return _wrap(self._d_[key])
63
68
 
64
69
  def to_dict(self):
@@ -120,14 +125,17 @@ class ElasticSearchResponse(AttributeDict):
120
125
  super().__init__(response)
121
126
 
122
127
  def __iter__(self) -> Iterator[Hit]:
128
+ """Provide an iterator over the hits in the Elasticsearch response."""
123
129
  return iter(self.hits)
124
130
 
125
131
  def __getitem__(self, key):
132
+ """Retrieve a specific hit or a slice of hits from the Elasticsearch response."""
126
133
  if isinstance(key, (slice, int)):
127
134
  return self.hits[key]
128
135
  return super().__getitem__(key)
129
136
 
130
137
  def __bool__(self):
138
+ """Evaluate the presence of hits in the Elasticsearch response."""
131
139
  return bool(self.hits)
132
140
 
133
141
  @property
@@ -41,6 +41,7 @@ from airflow.providers.elasticsearch.log.es_response import ElasticSearchRespons
41
41
  from airflow.utils import timezone
42
42
  from airflow.utils.log.file_task_handler import FileTaskHandler
43
43
  from airflow.utils.log.logging_mixin import ExternalLoggingMixin, LoggingMixin
44
+ from airflow.utils.module_loading import import_string
44
45
  from airflow.utils.session import create_session
45
46
 
46
47
  if TYPE_CHECKING:
@@ -108,7 +109,7 @@ def _ensure_ti(ti: TaskInstanceKey | TaskInstance, session) -> TaskInstance:
108
109
  .one_or_none()
109
110
  )
110
111
  if isinstance(val, TaskInstance):
111
- val._try_number = ti.try_number
112
+ val.try_number = ti.try_number
112
113
  return val
113
114
  else:
114
115
  raise AirflowException(f"Could not find TaskInstance for {ti}")
@@ -152,7 +153,8 @@ class ElasticsearchTaskHandler(FileTaskHandler, ExternalLoggingMixin, LoggingMix
152
153
  offset_field: str = "offset",
153
154
  host: str = "http://localhost:9200",
154
155
  frontend: str = "localhost:5601",
155
- index_patterns: str | None = conf.get("elasticsearch", "index_patterns", fallback="_all"),
156
+ index_patterns: str = conf.get("elasticsearch", "index_patterns"),
157
+ index_patterns_callable: str = conf.get("elasticsearch", "index_patterns_callable", fallback=""),
156
158
  es_kwargs: dict | None | Literal["default_es_kwargs"] = "default_es_kwargs",
157
159
  *,
158
160
  filename_template: str | None = None,
@@ -184,6 +186,7 @@ class ElasticsearchTaskHandler(FileTaskHandler, ExternalLoggingMixin, LoggingMix
184
186
  self.host_field = host_field
185
187
  self.offset_field = offset_field
186
188
  self.index_patterns = index_patterns
189
+ self.index_patterns_callable = index_patterns_callable
187
190
  self.context_set = False
188
191
 
189
192
  self.formatter: logging.Formatter
@@ -213,6 +216,19 @@ class ElasticsearchTaskHandler(FileTaskHandler, ExternalLoggingMixin, LoggingMix
213
216
 
214
217
  return host
215
218
 
219
+ def _get_index_patterns(self, ti: TaskInstance | None) -> str:
220
+ """
221
+ Get index patterns by calling index_patterns_callable, if provided, or the configured index_patterns.
222
+
223
+ :param ti: A TaskInstance object or None.
224
+ """
225
+ if self.index_patterns_callable:
226
+ self.log.debug("Using index_patterns_callable: %s", self.index_patterns_callable)
227
+ index_pattern_callable_obj = import_string(self.index_patterns_callable)
228
+ return index_pattern_callable_obj(ti)
229
+ self.log.debug("Using index_patterns: %s", self.index_patterns)
230
+ return self.index_patterns
231
+
216
232
  def _render_log_id(self, ti: TaskInstance | TaskInstanceKey, try_number: int) -> str:
217
233
  from airflow.models.taskinstance import TaskInstanceKey
218
234
 
@@ -225,6 +241,8 @@ class ElasticsearchTaskHandler(FileTaskHandler, ExternalLoggingMixin, LoggingMix
225
241
  else:
226
242
  log_id_template = self.log_id_template
227
243
 
244
+ if TYPE_CHECKING:
245
+ assert ti.task
228
246
  try:
229
247
  dag = ti.task.dag
230
248
  except AttributeError: # ti.task is not always set.
@@ -300,7 +318,7 @@ class ElasticsearchTaskHandler(FileTaskHandler, ExternalLoggingMixin, LoggingMix
300
318
 
301
319
  offset = metadata["offset"]
302
320
  log_id = self._render_log_id(ti, try_number)
303
- response = self._es_read(log_id, offset)
321
+ response = self._es_read(log_id, offset, ti)
304
322
  if response is not None and response.hits:
305
323
  logs_by_host = self._group_logs_by_host(response)
306
324
  next_offset = attrgetter(self.offset_field)(response[-1])
@@ -370,12 +388,13 @@ class ElasticsearchTaskHandler(FileTaskHandler, ExternalLoggingMixin, LoggingMix
370
388
  # Just a safe-guard to preserve backwards-compatibility
371
389
  return hit.message
372
390
 
373
- def _es_read(self, log_id: str, offset: int | str) -> ElasticSearchResponse | None:
391
+ def _es_read(self, log_id: str, offset: int | str, ti: TaskInstance) -> ElasticSearchResponse | None:
374
392
  """
375
393
  Return the logs matching log_id in Elasticsearch and next offset or ''.
376
394
 
377
395
  :param log_id: the log_id of the log to read.
378
396
  :param offset: the offset start to read log from.
397
+ :param ti: the task instance object
379
398
 
380
399
  :meta private:
381
400
  """
@@ -386,16 +405,17 @@ class ElasticsearchTaskHandler(FileTaskHandler, ExternalLoggingMixin, LoggingMix
386
405
  }
387
406
  }
388
407
 
408
+ index_patterns = self._get_index_patterns(ti)
389
409
  try:
390
- max_log_line = self.client.count(index=self.index_patterns, query=query)["count"] # type: ignore
410
+ max_log_line = self.client.count(index=index_patterns, query=query)["count"] # type: ignore
391
411
  except NotFoundError as e:
392
- self.log.exception("The target index pattern %s does not exist", self.index_patterns)
412
+ self.log.exception("The target index pattern %s does not exist", index_patterns)
393
413
  raise e
394
414
 
395
415
  if max_log_line != 0:
396
416
  try:
397
417
  res = self.client.search(
398
- index=self.index_patterns,
418
+ index=index_patterns,
399
419
  query=query,
400
420
  sort=[self.offset_field],
401
421
  size=self.MAX_LINE_PER_PAGE,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-elasticsearch
3
- Version: 5.4.0rc1
3
+ Version: 5.4.1
4
4
  Summary: Provider package apache-airflow-providers-elasticsearch for Apache Airflow
5
5
  Keywords: airflow-provider,elasticsearch,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -19,14 +19,15 @@ Classifier: Programming Language :: Python :: 3.8
19
19
  Classifier: Programming Language :: Python :: 3.9
20
20
  Classifier: Programming Language :: Python :: 3.10
21
21
  Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
22
23
  Classifier: Topic :: System :: Monitoring
23
- Requires-Dist: apache-airflow-providers-common-sql>=1.3.1.dev0
24
- Requires-Dist: apache-airflow>=2.6.0.dev0
24
+ Requires-Dist: apache-airflow-providers-common-sql>=1.3.1
25
+ Requires-Dist: apache-airflow>=2.7.0
25
26
  Requires-Dist: elasticsearch>=8.10,<9
26
27
  Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
27
28
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
28
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-elasticsearch/5.4.0/changelog.html
29
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-elasticsearch/5.4.0
29
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-elasticsearch/5.4.1/changelog.html
30
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-elasticsearch/5.4.1
30
31
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
31
32
  Project-URL: Source Code, https://github.com/apache/airflow
32
33
  Project-URL: Twitter, https://twitter.com/ApacheAirflow
@@ -77,7 +78,7 @@ Provides-Extra: common.sql
77
78
 
78
79
  Package ``apache-airflow-providers-elasticsearch``
79
80
 
80
- Release: ``5.4.0.rc1``
81
+ Release: ``5.4.1``
81
82
 
82
83
 
83
84
  `Elasticsearch <https://www.elastic.co/elasticsearch>`__
@@ -90,7 +91,7 @@ This is a provider package for ``elasticsearch`` provider. All classes for this
90
91
  are in ``airflow.providers.elasticsearch`` python package.
91
92
 
92
93
  You can find package information and changelog for the provider
93
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-elasticsearch/5.4.0/>`_.
94
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-elasticsearch/5.4.1/>`_.
94
95
 
95
96
  Installation
96
97
  ------------
@@ -99,7 +100,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
99
100
  for the minimum Airflow version supported) via
100
101
  ``pip install apache-airflow-providers-elasticsearch``
101
102
 
102
- The package supports the following python versions: 3.8,3.9,3.10,3.11
103
+ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
103
104
 
104
105
  Requirements
105
106
  ------------
@@ -107,7 +108,7 @@ Requirements
107
108
  ======================================= ==================
108
109
  PIP package Version required
109
110
  ======================================= ==================
110
- ``apache-airflow`` ``>=2.6.0``
111
+ ``apache-airflow`` ``>=2.7.0``
111
112
  ``apache-airflow-providers-common-sql`` ``>=1.3.1``
112
113
  ``elasticsearch`` ``>=8.10,<9``
113
114
  ======================================= ==================
@@ -132,4 +133,4 @@ Dependent package
132
133
  ============================================================================================================ ==============
133
134
 
134
135
  The changelog for the provider package can be found in the
135
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-elasticsearch/5.4.0/changelog.html>`_.
136
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-elasticsearch/5.4.1/changelog.html>`_.
@@ -0,0 +1,13 @@
1
+ airflow/providers/elasticsearch/LICENSE,sha256=ywUBpKZc7Jb96rVt5I3IDbg7dIJAbUSHkuoDcF3jbH4,13569
2
+ airflow/providers/elasticsearch/__init__.py,sha256=leQgUnvVqq9KHi7Uffo5O3HuM6OldRNLpqcwJxlBFjE,1500
3
+ airflow/providers/elasticsearch/get_provider_info.py,sha256=b7kZ8E03LE7dotRl0fc2ZbPpfpf8EkJ7unuUmpxNwSQ,8507
4
+ airflow/providers/elasticsearch/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
5
+ airflow/providers/elasticsearch/hooks/elasticsearch.py,sha256=nraT_vR1vm0VZ13G4lIJ9YEI0igsm-A66BjFlf-yNmE,6362
6
+ airflow/providers/elasticsearch/log/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
7
+ airflow/providers/elasticsearch/log/es_json_formatter.py,sha256=DwWPDJtZLr_6Mdae1-XVEgmE1XErFIanSzxWovs50ig,1796
8
+ airflow/providers/elasticsearch/log/es_response.py,sha256=tEFz1pAUzIg6Zw43ZjiHFTOB72kNP-czbEwMbymUZ88,6037
9
+ airflow/providers/elasticsearch/log/es_task_handler.py,sha256=vgEQWlZwCp_K0jTv7KKwv6jPgXLXRCeSQ5833Wto7ts,26244
10
+ apache_airflow_providers_elasticsearch-5.4.1.dist-info/entry_points.txt,sha256=jpgAUVmTsdtWQ4nru2FJQKP9JBN4OPHK-ybfYc3_BOs,109
11
+ apache_airflow_providers_elasticsearch-5.4.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
12
+ apache_airflow_providers_elasticsearch-5.4.1.dist-info/METADATA,sha256=73QEnaM9VMtHOLQ1edfLeNQ0h5hxu2HNPxi_i9LJjiY,6217
13
+ apache_airflow_providers_elasticsearch-5.4.1.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- airflow/providers/elasticsearch/LICENSE,sha256=ywUBpKZc7Jb96rVt5I3IDbg7dIJAbUSHkuoDcF3jbH4,13569
2
- airflow/providers/elasticsearch/__init__.py,sha256=OyxIHdA-PcxE5vOaA2ZbScRgTrXifx4xfo9nQmWyBEo,1588
3
- airflow/providers/elasticsearch/get_provider_info.py,sha256=HCp7W6EQYrczJLrDOsl5Bg22st23Vw7Q20daeKyxc6o,7878
4
- airflow/providers/elasticsearch/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
5
- airflow/providers/elasticsearch/hooks/elasticsearch.py,sha256=V2tMQKKGZgWwTnrc7n_-IW6XrFIY6oAygqgcSpXTUDk,6448
6
- airflow/providers/elasticsearch/log/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
7
- airflow/providers/elasticsearch/log/es_json_formatter.py,sha256=IBCC6U8XUIfZ1vZKeDAQWCwNCXhP5yG2oOjSJesuGV0,1640
8
- airflow/providers/elasticsearch/log/es_response.py,sha256=TSZSkf63q-uM6dju4FJXx-GVcmsbxSdx9tMdcBTXmaA,5428
9
- airflow/providers/elasticsearch/log/es_task_handler.py,sha256=zXMZM13Yvc80pZNmftxL6YXcuIrCs-2wQ9-KWCUYut0,25257
10
- apache_airflow_providers_elasticsearch-5.4.0rc1.dist-info/entry_points.txt,sha256=jpgAUVmTsdtWQ4nru2FJQKP9JBN4OPHK-ybfYc3_BOs,109
11
- apache_airflow_providers_elasticsearch-5.4.0rc1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
12
- apache_airflow_providers_elasticsearch-5.4.0rc1.dist-info/METADATA,sha256=aeEiQsqw8chv_CwivuLryg0Q00CxOQOsjPKKWnexnsE,6178
13
- apache_airflow_providers_elasticsearch-5.4.0rc1.dist-info/RECORD,,