fustor-source-elasticsearch 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,311 @@
1
+ """
2
+ Fuagent source driver for Elasticsearch.
3
+ """
4
+ import logging
5
+ import threading
6
+ from typing import Any, Dict, Iterator, Tuple
7
+ from datetime import datetime
8
+
9
+ from elasticsearch import Elasticsearch, AsyncElasticsearch, AuthenticationException, AuthorizationException
10
+
11
+ from fustor_core.drivers import SourceDriver
12
+ from fustor_core.models.config import SourceConfig, PasswdCredential, ApiKeyCredential
13
+ from fustor_core.exceptions import DriverError
14
+ from fustor_event_model.models import EventBase, InsertEvent
15
+
16
+ logger = logging.getLogger("fustor_agent.driver.elasticsearch")
17
+
18
+ class ElasticsearchDriver(SourceDriver):
19
+ _instances: Dict[str, 'ElasticsearchDriver'] = {}
20
+ _lock = threading.Lock()
21
+
22
+ def __new__(cls, id: str, config: SourceConfig):
23
+ # Generate unique signature: URI + credential to ensure permission isolation
24
+ signature = f"{config.uri}#{hash(str(config.credential))}"
25
+
26
+ with ElasticsearchDriver._lock:
27
+ if signature not in ElasticsearchDriver._instances:
28
+ instance = super().__new__(cls)
29
+ ElasticsearchDriver._instances[signature] = instance
30
+ return ElasticsearchDriver._instances[signature]
31
+
32
+ def __init__(self, id: str, config: SourceConfig):
33
+ # Prevent re-initialization of shared instances
34
+ if hasattr(self, '_initialized'):
35
+ return
36
+
37
+ super().__init__(id, config)
38
+ self.uri = self.config.uri
39
+ self.credential = self.config.credential
40
+ self.driver_params = self.config.driver_params
41
+
42
+ self._initialized = True
43
+
44
+ def _get_es_client(self) -> Elasticsearch:
45
+ return self._get_sync_es_client(self.uri, self.credential)
46
+
47
+ @staticmethod
48
+ def _get_sync_es_client(uri: str, credential: Any) -> Elasticsearch:
49
+ auth_params = {}
50
+ if isinstance(credential, PasswdCredential) and credential.user:
51
+ auth_params = (credential.user, credential.passwd or '')
52
+ elif isinstance(credential, ApiKeyCredential) and credential.key:
53
+ auth_params = {'api_key': credential.key}
54
+
55
+ return Elasticsearch(
56
+ hosts=[uri],
57
+ basic_auth=auth_params if isinstance(auth_params, tuple) else None,
58
+ api_key=auth_params.get('api_key') if isinstance(auth_params, dict) else None
59
+ )
60
+
61
+ @staticmethod
62
+ async def _get_async_es_client(uri: str, credential: Any) -> AsyncElasticsearch:
63
+ auth_params = {}
64
+ if isinstance(credential, PasswdCredential) and credential.user:
65
+ auth_params = (credential.user, credential.passwd or '')
66
+ elif isinstance(credential, ApiKeyCredential) and credential.key:
67
+ auth_params = {'api_key': credential.key}
68
+
69
+ return AsyncElasticsearch(
70
+ hosts=[uri],
71
+ basic_auth=auth_params if isinstance(auth_params, tuple) else None,
72
+ api_key=auth_params.get('api_key') if isinstance(auth_params, dict) else None
73
+ )
74
+
75
+ def get_snapshot_iterator(self, **kwargs) -> Iterator[EventBase]:
76
+ index_name = self.driver_params.get("index_name")
77
+ timestamp_field = self.driver_params.get("timestamp_field")
78
+ if not index_name or not timestamp_field:
79
+ raise DriverError("'index_name' and 'timestamp_field' are required driver parameters.")
80
+
81
+ client = self._get_es_client()
82
+ logger.info(f"Starting snapshot for Elasticsearch index '{index_name}'.")
83
+
84
+ pit = client.open_point_in_time(index=index_name, keep_alive="1m")
85
+ try:
86
+ search_after = None
87
+ snapshot_time = int(datetime.now().timestamp() * 1000)
88
+ while True:
89
+ resp = client.search(
90
+ index=index_name,
91
+ size=kwargs.get("batch_size", 100),
92
+ sort=[{timestamp_field: "asc"}, {"_doc": "asc"}],
93
+ pit={"id": pit['id'], "keep_alive": "1m"},
94
+ search_after=search_after
95
+ )
96
+ hits = resp['hits']['hits']
97
+ if not hits:
98
+ break
99
+
100
+ rows = [_normalize_doc(h) for h in hits]
101
+ fields = list(rows[0].keys()) if rows else []
102
+ yield InsertEvent(event_schema=index_name, table=index_name, rows=rows, fields=fields, index=snapshot_time)
103
+
104
+ search_after = hits[-1]['sort']
105
+ finally:
106
+ if pit:
107
+ client.close_point_in_time(id=pit['id'])
108
+ logger.info(f"Snapshot for Elasticsearch index '{index_name}' finished.")
109
+
110
+ def is_position_available(self, position: int) -> bool:
111
+ """
112
+ Checks if the Elasticsearch position (timestamp) is available for resuming.
113
+ Since Elasticsearch doesn't have the same concept of positions as databases,
114
+ we generally consider positions available but may need to check if they're too old.
115
+ """
116
+ # For now, assume all positions are available as ES allows timestamp-based queries
117
+ # TODO In the future, we might check if the position is older than retention period
118
+ return True
119
+
120
+ def get_message_iterator(self, start_position: int=-1, **kwargs) -> Iterator[EventBase]:
121
+
122
+ def _iterator_func() -> Iterator[EventBase]:
123
+ index_name = self.driver_params.get("index_name")
124
+ timestamp_field = self.driver_params.get("timestamp_field")
125
+ polling_interval = self.driver_params.get("polling_interval_sec", 5)
126
+ if not index_name or not timestamp_field:
127
+ raise DriverError("'index_name' and 'timestamp_field' are required driver parameters.")
128
+
129
+ client = self._get_es_client()
130
+ start_timestamp = start_position if start_position!=-1 else int(datetime.now().timestamp() * 1000)
131
+ last_ts_iso = datetime.fromtimestamp(start_timestamp / 1000).isoformat()
132
+ stop_event = kwargs.get("stop_event")
133
+
134
+ while not stop_event.is_set():
135
+ resp = client.search(
136
+ index=index_name,
137
+ query={"range": {timestamp_field: {"gt": last_ts_iso}}},
138
+ sort=[{timestamp_field: "asc"}],
139
+ size=100
140
+ )
141
+ hits = resp['hits']['hits']
142
+ if not hits:
143
+ stop_event.wait(timeout=polling_interval)
144
+ continue
145
+
146
+ for hit in hits:
147
+ if stop_event.is_set():
148
+ break
149
+ doc = _normalize_doc(hit)
150
+ ts_str = doc.get(timestamp_field)
151
+ if ts_str:
152
+ dt = datetime.fromisoformat(ts_str.replace('Z', '+00:00'))
153
+ current_ts_ms = int(dt.timestamp() * 1000)
154
+ yield UpdateEvent(event_schema=index_name, table=index_name, rows=[doc], fields=list(doc.keys()), index=current_ts_ms)
155
+ last_ts_iso = dt.isoformat()
156
+
157
+ return _iterator_func()
158
+
159
+ @classmethod
160
+ async def get_available_fields(cls, **kwargs) -> Dict[str, Any]:
161
+ uri = kwargs.get("uri")
162
+ credential_data = kwargs.get("credential")
163
+ driver_params = kwargs.get("driver_params", {})
164
+ index_name = driver_params.get("index_name")
165
+
166
+ if uri is None or credential_data is None or index_name is None:
167
+ raise DriverError("'uri', 'credential', and 'driver_params.index_name' are required.")
168
+
169
+ client = await cls._get_async_es_client(uri, credential_data)
170
+ try:
171
+ mapping = await client.indices.get_mapping(index=index_name)
172
+ properties = mapping[index_name]["mappings"].get("properties", {})
173
+
174
+ flat_properties = {}
175
+ def flatten(props, prefix=''):
176
+ for key, value in props.items():
177
+ if "properties" in value:
178
+ flatten(value["properties"], f"{prefix}{key}.")
179
+ else:
180
+ flat_properties[f"{prefix}{key}"] = {"type": value.get("type", "object")}
181
+
182
+ flatten(properties)
183
+ flat_properties["_id"] = {"type": "keyword"}
184
+ flat_properties["_index"] = {"type": "keyword"}
185
+
186
+ return {"properties": flat_properties}
187
+ except Exception as e:
188
+ logger.error(f"Failed to get available fields for ES index '{index_name}': {e}")
189
+ raise DriverError(f"Failed to get mapping for index '{index_name}': {e}")
190
+ finally:
191
+ await client.close()
192
+
193
+ @classmethod
194
+ async def test_connection(cls, **kwargs) -> Tuple[bool, str]:
195
+ uri = kwargs.get("uri")
196
+ credential_data = kwargs.get("credential")
197
+ if uri is None or credential_data is None:
198
+ return False, "'uri' and 'credential' are required."
199
+
200
+ try:
201
+ client = await cls._get_async_es_client(uri, credential_data)
202
+ if await client.ping():
203
+ return True, "Successfully connected to Elasticsearch."
204
+ else:
205
+ return False, "Connection to Elasticsearch failed."
206
+ except (AuthenticationException, AuthorizationException) as e:
207
+ return False, f"Authentication/Authorization failed: {e}"
208
+ except Exception as e:
209
+ return False, f"An unexpected error occurred: {e}"
210
+ finally:
211
+ if 'client' in locals():
212
+ await client.close()
213
+
214
+ @classmethod
215
+ async def check_privileges(cls, **kwargs) -> Tuple[bool, str]:
216
+ uri = kwargs.get("uri")
217
+ credential_data = kwargs.get("credential")
218
+ driver_params = kwargs.get("driver_params", {})
219
+ index_name = driver_params.get("index_name")
220
+
221
+ if uri is None or credential_data is None or index_name is None:
222
+ return False, "'uri', 'credential', and 'driver_params.index_name' are required."
223
+
224
+ client = await cls._get_async_es_client(uri, credential_data)
225
+ try:
226
+ response = await client.security.has_privileges(
227
+ body={"index": [{"names": [index_name], "privileges": ["read"]}]}
228
+ )
229
+ if response.get("has_all_requested"):
230
+ return True, "User has sufficient privileges for the index."
231
+ else:
232
+ return False, f"User lacks 'read' privilege for index '{index_name}'."
233
+ except Exception as e:
234
+ logger.warning(f"Could not verify privileges via security API (may not be enabled): {e}. Assuming success.")
235
+ return True, "Could not verify privileges via security API; assuming success."
236
+ finally:
237
+ await client.close()
238
+
239
+ @classmethod
240
+ async def get_wizard_steps(cls) -> Dict[str, Any]:
241
+ return {
242
+ "steps": [
243
+ {
244
+ "step_id": "connection",
245
+ "title": "Connection Details",
246
+ "schema": {
247
+ "type": "object",
248
+ "properties": {
249
+ "uri": {"type": "string", "title": "Elasticsearch URI"},
250
+ "credential": {
251
+ "type": "object",
252
+ "title": "Credentials",
253
+ "oneOf": [
254
+ {"$ref": "#/components/schemas/PasswdCredential"},
255
+ {"$ref": "#/components/schemas/ApiKeyCredential"}
256
+ ]
257
+ }
258
+ },
259
+ "required": ["uri", "credential"]
260
+ },
261
+ "validations": ["test_connection"]
262
+ },
263
+ {
264
+ "step_id": "index_config",
265
+ "title": "Index Configuration",
266
+ "schema": {
267
+ "type": "object",
268
+ "properties": {
269
+ "driver_params": {
270
+ "type": "object",
271
+ "title": "Driver Parameters",
272
+ "properties": {
273
+ "index_name": {"type": "string", "title": "Index Name"},
274
+ "timestamp_field": {"type": "string", "title": "Timestamp Field"}
275
+ },
276
+ "required": ["index_name", "timestamp_field"]
277
+ }
278
+ },
279
+ "required": ["driver_params"]
280
+ },
281
+ "validations": ["check_privileges", "discover_fields_no_cache"]
282
+ }
283
+ ],
284
+ "components": {
285
+ "schemas": {
286
+ "PasswdCredential": {
287
+ "type": "object",
288
+ "title": "Username/Password",
289
+ "properties": {
290
+ "user": {"type": "string", "title": "Username"},
291
+ "passwd": {"type": "string", "title": "Password", "format": "password"}
292
+ },
293
+ "required": ["user"]
294
+ },
295
+ "ApiKeyCredential": {
296
+ "type": "object",
297
+ "title": "API Key",
298
+ "properties": {
299
+ "key": {"type": "string", "title": "API Key", "format": "password"}
300
+ },
301
+ "required": ["key"]
302
+ }
303
+ }
304
+ }
305
+ }
306
+
307
+ def _normalize_doc(hit: Dict[str, Any]) -> Dict[str, Any]:
308
+ doc = hit.get('_source', {})
309
+ doc['_id'] = hit.get('_id')
310
+ doc['_index'] = hit.get('_index')
311
+ return doc
File without changes
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: fustor-source-elasticsearch
3
+ Version: 0.1.4
4
+ Summary: An Elasticsearch source for Fustor Agent
5
+ Author-email: Huajin Wang <wanghuajin999@163.com>
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: elasticsearch>=9.2.0
8
+ Requires-Dist: fustor-core
@@ -0,0 +1,7 @@
1
+ fustor_source_elasticsearch/__init__.py,sha256=ww7iz8OK1XrzM9XxBOqTx8hd9fa35lDm5MZrLUl7Boc,13836
2
+ fustor_source_elasticsearch/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ fustor_source_elasticsearch-0.1.4.dist-info/METADATA,sha256=xC3-coD6pANIVGITzw52Uv6uOoYwHXOVq5WBkqqwRV8,258
4
+ fustor_source_elasticsearch-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
+ fustor_source_elasticsearch-0.1.4.dist-info/entry_points.txt,sha256=6ra2kkIbMjl9GGlMiCZOEmQauXZ69eotLbhw6R19LWM,95
6
+ fustor_source_elasticsearch-0.1.4.dist-info/top_level.txt,sha256=5yN8-ZDiTMdXAdqhUizGREqzj9bvxRz0MUZ_96bDzfg,28
7
+ fustor_source_elasticsearch-0.1.4.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [fustor_agent.drivers.sources]
2
+ elasticsearch = fustor_source_elasticsearch:ElasticsearchDriver
@@ -0,0 +1 @@
1
+ fustor_source_elasticsearch