nui-python-shared-utils 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,460 @@
1
+ """
2
+ Refactored Elasticsearch client using BaseClient for DRY code patterns.
3
+ """
4
+
5
+ import os
6
+ import logging
7
+ from datetime import datetime, timedelta
8
+ from typing import Dict, Iterator, List, Optional, Any, Tuple
9
+ from urllib.parse import urlparse
10
+ from elasticsearch import Elasticsearch
11
+ from elasticsearch.helpers import streaming_bulk as es_streaming_bulk
12
+
13
+ from .base_client import BaseClient, ServiceHealthMixin
14
+ from .utils import handle_client_errors, resolve_config_value
15
+
16
+ log = logging.getLogger(__name__)
17
+
18
+
19
+ class ElasticsearchClient(BaseClient, ServiceHealthMixin):
20
+ """
21
+ Refactored Elasticsearch client with standardized patterns.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ host: Optional[str] = None,
27
+ secret_name: Optional[str] = None,
28
+ credentials: Optional[Dict[str, Any]] = None,
29
+ **kwargs,
30
+ ):
31
+ """
32
+ Initialize Elasticsearch client.
33
+
34
+ Args:
35
+ host: Override ES host
36
+ secret_name: Override secret name
37
+ credentials: Direct credentials dict (keys: username, password), bypasses Secrets Manager
38
+ **kwargs: Additional ES client configuration
39
+ """
40
+ # Store host for later use in service client creation
41
+ self._host_override = host
42
+ super().__init__(secret_name=secret_name, credentials=credentials, **kwargs)
43
+
44
+ def _get_default_config_prefix(self) -> str:
45
+ """Return configuration prefix for Elasticsearch."""
46
+ return "es"
47
+
48
+ def _get_default_secret_name(self) -> str:
49
+ """Return default secret name for ES credentials."""
50
+ return "elasticsearch-credentials"
51
+
52
+ def _resolve_credentials_from_env(self) -> Optional[Dict[str, Any]]:
53
+ """Resolve Elasticsearch credentials from environment variables.
54
+
55
+ Checks for ES_PASSWORD (required to trigger).
56
+ ES_USERNAME defaults to "elastic" if not set.
57
+ """
58
+ password = os.environ.get("ES_PASSWORD")
59
+ if not password:
60
+ return None
61
+ return {
62
+ "username": os.environ.get("ES_USERNAME", "elastic"),
63
+ "password": password,
64
+ }
65
+
66
+ def _create_service_client(self) -> Elasticsearch:
67
+ """Create Elasticsearch client with resolved configuration."""
68
+ # Resolve host using utility
69
+ host = resolve_config_value(
70
+ self._host_override,
71
+ ["ES_HOST", "ELASTICSEARCH_HOST"],
72
+ getattr(self.config, "es_host", "localhost:9200")
73
+ )
74
+
75
+ parsed = urlparse(host)
76
+ if parsed.scheme and parsed.netloc: # Valid URL with scheme://netloc
77
+ es_url = host
78
+ else:
79
+ if ":" not in host:
80
+ host = f"{host}:9200"
81
+ scheme = self.client_config.get("scheme", "http")
82
+ es_url = f"{scheme}://{host}"
83
+
84
+ # Get credentials
85
+ username = self.credentials.get("username", "elastic")
86
+ password = self.credentials.get("password")
87
+
88
+ if not password:
89
+ raise ValueError("Elasticsearch credentials must include 'password'")
90
+
91
+ # Create client with configuration
92
+ return Elasticsearch(
93
+ [es_url],
94
+ basic_auth=(username, password),
95
+ request_timeout=self.client_config.get("request_timeout", 30),
96
+ max_retries=self.client_config.get("max_retries", 3),
97
+ retry_on_timeout=self.client_config.get("retry_on_timeout", True),
98
+ )
99
+
100
+ @handle_client_errors(default_return=[])
101
+ def search(self, index: str, body: Dict, size: int = 100) -> List[Dict]:
102
+ """
103
+ Execute search query with error handling.
104
+
105
+ Args:
106
+ index: Index pattern to search
107
+ body: Elasticsearch query body
108
+ size: Maximum results to return
109
+
110
+ Returns:
111
+ List of hit documents
112
+ """
113
+ def _search_operation():
114
+ response = self._service_client.search(
115
+ index=index,
116
+ body=body,
117
+ size=size,
118
+ ignore_unavailable=True
119
+ )
120
+ return [hit["_source"] for hit in response["hits"]["hits"]]
121
+
122
+ return self._execute_with_error_handling(
123
+ "search",
124
+ _search_operation,
125
+ index=index,
126
+ size=size
127
+ )
128
+
129
+ @handle_client_errors(default_return={})
130
+ def aggregate(self, index: str, body: Dict) -> Dict[str, Any]:
131
+ """
132
+ Execute aggregation query with error handling.
133
+
134
+ Args:
135
+ index: Index pattern to search
136
+ body: Elasticsearch query body with aggregations
137
+
138
+ Returns:
139
+ Aggregation results
140
+ """
141
+ def _aggregate_operation():
142
+ response = self._service_client.search(
143
+ index=index,
144
+ body=body,
145
+ size=0, # Only need aggregations
146
+ ignore_unavailable=True
147
+ )
148
+ return response.get("aggregations", {})
149
+
150
+ return self._execute_with_error_handling(
151
+ "aggregate",
152
+ _aggregate_operation,
153
+ index=index
154
+ )
155
+
156
+ @handle_client_errors(default_return=0)
157
+ def count(self, index: str, body: Optional[Dict[str, Any]] = None) -> int:
158
+ """
159
+ Count documents with error handling.
160
+
161
+ Args:
162
+ index: Index pattern to search
163
+ body: Optional query body
164
+
165
+ Returns:
166
+ Document count
167
+ """
168
+ def _count_operation():
169
+ response = self._service_client.count(
170
+ index=index,
171
+ body=body,
172
+ ignore_unavailable=True
173
+ )
174
+ return response.get("count", 0)
175
+
176
+ return self._execute_with_error_handling(
177
+ "count",
178
+ _count_operation,
179
+ index=index
180
+ )
181
+
182
+ @handle_client_errors(default_return={})
183
+ def get_service_stats(
184
+ self,
185
+ service: str,
186
+ hours: int = 24,
187
+ index_prefix: str = "logs"
188
+ ) -> Dict[str, Any]:
189
+ """
190
+ Get comprehensive service statistics.
191
+
192
+ Args:
193
+ service: Service name
194
+ hours: Time window to analyze
195
+ index_prefix: Index prefix pattern
196
+
197
+ Returns:
198
+ Dictionary with service statistics
199
+ """
200
+ def _stats_operation():
201
+ now = datetime.utcnow()
202
+ start_time = now - timedelta(hours=hours)
203
+ index = f"{index_prefix}-{service}-*"
204
+
205
+ # Build comprehensive stats query
206
+ body = {
207
+ "query": {
208
+ "bool": {
209
+ "filter": [{
210
+ "range": {
211
+ "@timestamp": {
212
+ "gte": start_time.isoformat(),
213
+ "lte": now.isoformat()
214
+ }
215
+ }
216
+ }]
217
+ }
218
+ },
219
+ "aggs": {
220
+ "total": {
221
+ "cardinality": {"field": "request_id.keyword"}
222
+ },
223
+ "errors": {
224
+ "filter": {"range": {"response_code": {"gte": 400}}},
225
+ "aggs": {
226
+ "count": {
227
+ "cardinality": {"field": "request_id.keyword"}
228
+ }
229
+ }
230
+ },
231
+ "response_times": {
232
+ "percentiles": {
233
+ "field": "response_time",
234
+ "percents": [50, 95, 99]
235
+ }
236
+ }
237
+ }
238
+ }
239
+
240
+ aggs = self.aggregate(index, body)
241
+
242
+ total = aggs.get("total", {}).get("value", 0)
243
+ errors = aggs.get("errors", {}).get("count", {}).get("value", 0)
244
+ percentiles = aggs.get("response_times", {}).get("values", {})
245
+
246
+ return {
247
+ "service": service,
248
+ "time_window_hours": hours,
249
+ "total_count": total,
250
+ "error_count": errors,
251
+ "error_rate": (errors / total * 100) if total > 0 else 0,
252
+ "p50_response_time": percentiles.get("50.0", 0),
253
+ "p95_response_time": percentiles.get("95.0", 0),
254
+ "p99_response_time": percentiles.get("99.0", 0),
255
+ }
256
+
257
+ return self._execute_with_error_handling(
258
+ "get_service_stats",
259
+ _stats_operation,
260
+ service=service,
261
+ hours=hours
262
+ )
263
+
264
+ @handle_client_errors(default_return=[])
265
+ def get_recent_errors(
266
+ self,
267
+ service: str,
268
+ hours: int = 1,
269
+ limit: int = 10,
270
+ index_prefix: str = "logs"
271
+ ) -> List[Dict]:
272
+ """
273
+ Get recent error logs for a service.
274
+
275
+ Args:
276
+ service: Service name
277
+ hours: Time window
278
+ limit: Maximum number of errors
279
+ index_prefix: Index prefix
280
+
281
+ Returns:
282
+ List of recent error documents
283
+ """
284
+ def _errors_operation():
285
+ now = datetime.utcnow()
286
+ start_time = now - timedelta(hours=hours)
287
+ index = f"{index_prefix}-{service}-*"
288
+
289
+ body = {
290
+ "query": {
291
+ "bool": {
292
+ "filter": [
293
+ {
294
+ "range": {
295
+ "@timestamp": {
296
+ "gte": start_time.isoformat(),
297
+ "lte": now.isoformat()
298
+ }
299
+ }
300
+ },
301
+ {
302
+ "range": {"response_code": {"gte": 400}}
303
+ }
304
+ ]
305
+ }
306
+ },
307
+ "sort": [{"@timestamp": {"order": "desc"}}]
308
+ }
309
+
310
+ return self.search(index, body, size=limit)
311
+
312
+ return self._execute_with_error_handling(
313
+ "get_recent_errors",
314
+ _errors_operation,
315
+ service=service,
316
+ hours=hours,
317
+ limit=limit
318
+ )
319
+
320
+ def _perform_health_check(self):
321
+ """Perform Elasticsearch health check."""
322
+ try:
323
+ # Try to get cluster info
324
+ info = self._service_client.info()
325
+ if not info.get("version"):
326
+ raise Exception("Elasticsearch info response invalid")
327
+
328
+ # Check cluster health
329
+ health = self._service_client.cluster.health()
330
+ if health.get("status") == "red":
331
+ raise Exception(f"Elasticsearch cluster is red: {health}")
332
+
333
+ except Exception as e:
334
+ raise Exception(f"Elasticsearch health check failed: {e}")
335
+
336
+ def get_cluster_info(self) -> Dict:
337
+ """
338
+ Get Elasticsearch cluster information.
339
+
340
+ Returns:
341
+ Cluster info dictionary
342
+ """
343
+ try:
344
+ info = self._service_client.info()
345
+ health = self._service_client.cluster.health()
346
+
347
+ return {
348
+ "version": info.get("version", {}).get("number"),
349
+ "cluster_name": info.get("cluster_name"),
350
+ "cluster_status": health.get("status"),
351
+ "number_of_nodes": health.get("number_of_nodes"),
352
+ "number_of_data_nodes": health.get("number_of_data_nodes"),
353
+ "active_primary_shards": health.get("active_primary_shards"),
354
+ "active_shards": health.get("active_shards"),
355
+ }
356
+ except Exception as e:
357
+ log.error(f"Failed to get cluster info: {e}")
358
+ return {"error": str(e)}
359
+
360
+ @handle_client_errors(default_return=[])
361
+ def get_indices_info(self, pattern: str = "*") -> List[Dict]:
362
+ """
363
+ Get information about indices.
364
+
365
+ Args:
366
+ pattern: Index pattern to match
367
+
368
+ Returns:
369
+ List of index information dictionaries
370
+ """
371
+ def _indices_operation():
372
+ response = self._service_client.cat.indices(
373
+ index=pattern,
374
+ format="json",
375
+ h="index,health,status,docs.count,store.size"
376
+ )
377
+ return response or []
378
+
379
+ return self._execute_with_error_handling(
380
+ "get_indices_info",
381
+ _indices_operation,
382
+ pattern=pattern
383
+ )
384
+
385
+ def streaming_bulk(
386
+ self,
387
+ actions: Iterator[Dict],
388
+ chunk_size: int = 100,
389
+ max_retries: int = 2,
390
+ raise_on_error: bool = False,
391
+ **kwargs
392
+ ) -> Tuple[int, int]:
393
+ """
394
+ Stream documents to Elasticsearch with error handling.
395
+
396
+ Wrapper around elasticsearch.helpers.streaming_bulk() that provides
397
+ automatic error logging and returns success/failure counts.
398
+
399
+ Args:
400
+ actions: Iterator of action dictionaries. Each dict should have
401
+ '_index' and '_source' keys (and optionally '_id', '_op_type').
402
+ chunk_size: Number of documents to send per batch (default: 100)
403
+ max_retries: Number of retries for failed documents (default: 2)
404
+ raise_on_error: If True, raise exception on first error (default: False)
405
+ **kwargs: Additional arguments passed to streaming_bulk()
406
+
407
+ Returns:
408
+ Tuple of (success_count, failure_count)
409
+
410
+ Example:
411
+ def generate_docs():
412
+ for item in items:
413
+ yield {
414
+ "_index": "my-index",
415
+ "_source": {"field": item.value}
416
+ }
417
+
418
+ success, failed = client.streaming_bulk(generate_docs())
419
+ print(f"Indexed {success} documents, {failed} failures")
420
+ """
421
+ success_count = 0
422
+ failure_count = 0
423
+ context = {"client_type": self.__class__.__name__, "operation": "streaming_bulk"}
424
+
425
+ try:
426
+ for ok, response in es_streaming_bulk(
427
+ client=self._service_client,
428
+ actions=actions,
429
+ chunk_size=chunk_size,
430
+ max_retries=max_retries,
431
+ raise_on_error=raise_on_error,
432
+ **kwargs
433
+ ):
434
+ if ok:
435
+ success_count += 1
436
+ else:
437
+ failure_count += 1
438
+ action_type = list(response.keys())[0] if response else "unknown"
439
+ error_info = response.get(action_type, {})
440
+ log.error(
441
+ f"Bulk indexing failure: {action_type}",
442
+ extra={
443
+ **context,
444
+ "index": error_info.get("_index", "unknown"),
445
+ "error": error_info.get("error", "unknown"),
446
+ }
447
+ )
448
+ except Exception as e:
449
+ log.error(
450
+ f"streaming_bulk failed: {e}",
451
+ extra={**context, "error_type": type(e).__name__, "error_message": str(e)}
452
+ )
453
+ if raise_on_error:
454
+ raise
455
+
456
+ log.debug(
457
+ f"streaming_bulk completed: {success_count} successful, {failure_count} failed",
458
+ extra={**context, "success_count": success_count, "failure_count": failure_count}
459
+ )
460
+ return success_count, failure_count