fustor-source-elasticsearch 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fustor_source_elasticsearch/__init__.py +311 -0
- fustor_source_elasticsearch/py.typed +0 -0
- fustor_source_elasticsearch-0.1.4.dist-info/METADATA +8 -0
- fustor_source_elasticsearch-0.1.4.dist-info/RECORD +7 -0
- fustor_source_elasticsearch-0.1.4.dist-info/WHEEL +5 -0
- fustor_source_elasticsearch-0.1.4.dist-info/entry_points.txt +2 -0
- fustor_source_elasticsearch-0.1.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Fuagent source driver for Elasticsearch.
|
|
3
|
+
"""
|
|
4
|
+
import logging
|
|
5
|
+
import threading
|
|
6
|
+
from typing import Any, Dict, Iterator, Tuple
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
|
|
9
|
+
from elasticsearch import Elasticsearch, AsyncElasticsearch, AuthenticationException, AuthorizationException
|
|
10
|
+
|
|
11
|
+
from fustor_core.drivers import SourceDriver
|
|
12
|
+
from fustor_core.models.config import SourceConfig, PasswdCredential, ApiKeyCredential
|
|
13
|
+
from fustor_core.exceptions import DriverError
|
|
14
|
+
from fustor_event_model.models import EventBase, InsertEvent
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger("fustor_agent.driver.elasticsearch")
|
|
17
|
+
|
|
18
|
+
class ElasticsearchDriver(SourceDriver):
|
|
19
|
+
_instances: Dict[str, 'ElasticsearchDriver'] = {}
|
|
20
|
+
_lock = threading.Lock()
|
|
21
|
+
|
|
22
|
+
def __new__(cls, id: str, config: SourceConfig):
|
|
23
|
+
# Generate unique signature: URI + credential to ensure permission isolation
|
|
24
|
+
signature = f"{config.uri}#{hash(str(config.credential))}"
|
|
25
|
+
|
|
26
|
+
with ElasticsearchDriver._lock:
|
|
27
|
+
if signature not in ElasticsearchDriver._instances:
|
|
28
|
+
instance = super().__new__(cls)
|
|
29
|
+
ElasticsearchDriver._instances[signature] = instance
|
|
30
|
+
return ElasticsearchDriver._instances[signature]
|
|
31
|
+
|
|
32
|
+
def __init__(self, id: str, config: SourceConfig):
|
|
33
|
+
# Prevent re-initialization of shared instances
|
|
34
|
+
if hasattr(self, '_initialized'):
|
|
35
|
+
return
|
|
36
|
+
|
|
37
|
+
super().__init__(id, config)
|
|
38
|
+
self.uri = self.config.uri
|
|
39
|
+
self.credential = self.config.credential
|
|
40
|
+
self.driver_params = self.config.driver_params
|
|
41
|
+
|
|
42
|
+
self._initialized = True
|
|
43
|
+
|
|
44
|
+
def _get_es_client(self) -> Elasticsearch:
|
|
45
|
+
return self._get_sync_es_client(self.uri, self.credential)
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def _get_sync_es_client(uri: str, credential: Any) -> Elasticsearch:
|
|
49
|
+
auth_params = {}
|
|
50
|
+
if isinstance(credential, PasswdCredential) and credential.user:
|
|
51
|
+
auth_params = (credential.user, credential.passwd or '')
|
|
52
|
+
elif isinstance(credential, ApiKeyCredential) and credential.key:
|
|
53
|
+
auth_params = {'api_key': credential.key}
|
|
54
|
+
|
|
55
|
+
return Elasticsearch(
|
|
56
|
+
hosts=[uri],
|
|
57
|
+
basic_auth=auth_params if isinstance(auth_params, tuple) else None,
|
|
58
|
+
api_key=auth_params.get('api_key') if isinstance(auth_params, dict) else None
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
async def _get_async_es_client(uri: str, credential: Any) -> AsyncElasticsearch:
|
|
63
|
+
auth_params = {}
|
|
64
|
+
if isinstance(credential, PasswdCredential) and credential.user:
|
|
65
|
+
auth_params = (credential.user, credential.passwd or '')
|
|
66
|
+
elif isinstance(credential, ApiKeyCredential) and credential.key:
|
|
67
|
+
auth_params = {'api_key': credential.key}
|
|
68
|
+
|
|
69
|
+
return AsyncElasticsearch(
|
|
70
|
+
hosts=[uri],
|
|
71
|
+
basic_auth=auth_params if isinstance(auth_params, tuple) else None,
|
|
72
|
+
api_key=auth_params.get('api_key') if isinstance(auth_params, dict) else None
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def get_snapshot_iterator(self, **kwargs) -> Iterator[EventBase]:
|
|
76
|
+
index_name = self.driver_params.get("index_name")
|
|
77
|
+
timestamp_field = self.driver_params.get("timestamp_field")
|
|
78
|
+
if not index_name or not timestamp_field:
|
|
79
|
+
raise DriverError("'index_name' and 'timestamp_field' are required driver parameters.")
|
|
80
|
+
|
|
81
|
+
client = self._get_es_client()
|
|
82
|
+
logger.info(f"Starting snapshot for Elasticsearch index '{index_name}'.")
|
|
83
|
+
|
|
84
|
+
pit = client.open_point_in_time(index=index_name, keep_alive="1m")
|
|
85
|
+
try:
|
|
86
|
+
search_after = None
|
|
87
|
+
snapshot_time = int(datetime.now().timestamp() * 1000)
|
|
88
|
+
while True:
|
|
89
|
+
resp = client.search(
|
|
90
|
+
index=index_name,
|
|
91
|
+
size=kwargs.get("batch_size", 100),
|
|
92
|
+
sort=[{timestamp_field: "asc"}, {"_doc": "asc"}],
|
|
93
|
+
pit={"id": pit['id'], "keep_alive": "1m"},
|
|
94
|
+
search_after=search_after
|
|
95
|
+
)
|
|
96
|
+
hits = resp['hits']['hits']
|
|
97
|
+
if not hits:
|
|
98
|
+
break
|
|
99
|
+
|
|
100
|
+
rows = [_normalize_doc(h) for h in hits]
|
|
101
|
+
fields = list(rows[0].keys()) if rows else []
|
|
102
|
+
yield InsertEvent(event_schema=index_name, table=index_name, rows=rows, fields=fields, index=snapshot_time)
|
|
103
|
+
|
|
104
|
+
search_after = hits[-1]['sort']
|
|
105
|
+
finally:
|
|
106
|
+
if pit:
|
|
107
|
+
client.close_point_in_time(id=pit['id'])
|
|
108
|
+
logger.info(f"Snapshot for Elasticsearch index '{index_name}' finished.")
|
|
109
|
+
|
|
110
|
+
def is_position_available(self, position: int) -> bool:
|
|
111
|
+
"""
|
|
112
|
+
Checks if the Elasticsearch position (timestamp) is available for resuming.
|
|
113
|
+
Since Elasticsearch doesn't have the same concept of positions as databases,
|
|
114
|
+
we generally consider positions available but may need to check if they're too old.
|
|
115
|
+
"""
|
|
116
|
+
# For now, assume all positions are available as ES allows timestamp-based queries
|
|
117
|
+
# TODO In the future, we might check if the position is older than retention period
|
|
118
|
+
return True
|
|
119
|
+
|
|
120
|
+
def get_message_iterator(self, start_position: int=-1, **kwargs) -> Iterator[EventBase]:
|
|
121
|
+
|
|
122
|
+
def _iterator_func() -> Iterator[EventBase]:
|
|
123
|
+
index_name = self.driver_params.get("index_name")
|
|
124
|
+
timestamp_field = self.driver_params.get("timestamp_field")
|
|
125
|
+
polling_interval = self.driver_params.get("polling_interval_sec", 5)
|
|
126
|
+
if not index_name or not timestamp_field:
|
|
127
|
+
raise DriverError("'index_name' and 'timestamp_field' are required driver parameters.")
|
|
128
|
+
|
|
129
|
+
client = self._get_es_client()
|
|
130
|
+
start_timestamp = start_position if start_position!=-1 else int(datetime.now().timestamp() * 1000)
|
|
131
|
+
last_ts_iso = datetime.fromtimestamp(start_timestamp / 1000).isoformat()
|
|
132
|
+
stop_event = kwargs.get("stop_event")
|
|
133
|
+
|
|
134
|
+
while not stop_event.is_set():
|
|
135
|
+
resp = client.search(
|
|
136
|
+
index=index_name,
|
|
137
|
+
query={"range": {timestamp_field: {"gt": last_ts_iso}}},
|
|
138
|
+
sort=[{timestamp_field: "asc"}],
|
|
139
|
+
size=100
|
|
140
|
+
)
|
|
141
|
+
hits = resp['hits']['hits']
|
|
142
|
+
if not hits:
|
|
143
|
+
stop_event.wait(timeout=polling_interval)
|
|
144
|
+
continue
|
|
145
|
+
|
|
146
|
+
for hit in hits:
|
|
147
|
+
if stop_event.is_set():
|
|
148
|
+
break
|
|
149
|
+
doc = _normalize_doc(hit)
|
|
150
|
+
ts_str = doc.get(timestamp_field)
|
|
151
|
+
if ts_str:
|
|
152
|
+
dt = datetime.fromisoformat(ts_str.replace('Z', '+00:00'))
|
|
153
|
+
current_ts_ms = int(dt.timestamp() * 1000)
|
|
154
|
+
yield UpdateEvent(event_schema=index_name, table=index_name, rows=[doc], fields=list(doc.keys()), index=current_ts_ms)
|
|
155
|
+
last_ts_iso = dt.isoformat()
|
|
156
|
+
|
|
157
|
+
return _iterator_func()
|
|
158
|
+
|
|
159
|
+
@classmethod
|
|
160
|
+
async def get_available_fields(cls, **kwargs) -> Dict[str, Any]:
|
|
161
|
+
uri = kwargs.get("uri")
|
|
162
|
+
credential_data = kwargs.get("credential")
|
|
163
|
+
driver_params = kwargs.get("driver_params", {})
|
|
164
|
+
index_name = driver_params.get("index_name")
|
|
165
|
+
|
|
166
|
+
if uri is None or credential_data is None or index_name is None:
|
|
167
|
+
raise DriverError("'uri', 'credential', and 'driver_params.index_name' are required.")
|
|
168
|
+
|
|
169
|
+
client = await cls._get_async_es_client(uri, credential_data)
|
|
170
|
+
try:
|
|
171
|
+
mapping = await client.indices.get_mapping(index=index_name)
|
|
172
|
+
properties = mapping[index_name]["mappings"].get("properties", {})
|
|
173
|
+
|
|
174
|
+
flat_properties = {}
|
|
175
|
+
def flatten(props, prefix=''):
|
|
176
|
+
for key, value in props.items():
|
|
177
|
+
if "properties" in value:
|
|
178
|
+
flatten(value["properties"], f"{prefix}{key}.")
|
|
179
|
+
else:
|
|
180
|
+
flat_properties[f"{prefix}{key}"] = {"type": value.get("type", "object")}
|
|
181
|
+
|
|
182
|
+
flatten(properties)
|
|
183
|
+
flat_properties["_id"] = {"type": "keyword"}
|
|
184
|
+
flat_properties["_index"] = {"type": "keyword"}
|
|
185
|
+
|
|
186
|
+
return {"properties": flat_properties}
|
|
187
|
+
except Exception as e:
|
|
188
|
+
logger.error(f"Failed to get available fields for ES index '{index_name}': {e}")
|
|
189
|
+
raise DriverError(f"Failed to get mapping for index '{index_name}': {e}")
|
|
190
|
+
finally:
|
|
191
|
+
await client.close()
|
|
192
|
+
|
|
193
|
+
@classmethod
|
|
194
|
+
async def test_connection(cls, **kwargs) -> Tuple[bool, str]:
|
|
195
|
+
uri = kwargs.get("uri")
|
|
196
|
+
credential_data = kwargs.get("credential")
|
|
197
|
+
if uri is None or credential_data is None:
|
|
198
|
+
return False, "'uri' and 'credential' are required."
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
client = await cls._get_async_es_client(uri, credential_data)
|
|
202
|
+
if await client.ping():
|
|
203
|
+
return True, "Successfully connected to Elasticsearch."
|
|
204
|
+
else:
|
|
205
|
+
return False, "Connection to Elasticsearch failed."
|
|
206
|
+
except (AuthenticationException, AuthorizationException) as e:
|
|
207
|
+
return False, f"Authentication/Authorization failed: {e}"
|
|
208
|
+
except Exception as e:
|
|
209
|
+
return False, f"An unexpected error occurred: {e}"
|
|
210
|
+
finally:
|
|
211
|
+
if 'client' in locals():
|
|
212
|
+
await client.close()
|
|
213
|
+
|
|
214
|
+
@classmethod
|
|
215
|
+
async def check_privileges(cls, **kwargs) -> Tuple[bool, str]:
|
|
216
|
+
uri = kwargs.get("uri")
|
|
217
|
+
credential_data = kwargs.get("credential")
|
|
218
|
+
driver_params = kwargs.get("driver_params", {})
|
|
219
|
+
index_name = driver_params.get("index_name")
|
|
220
|
+
|
|
221
|
+
if uri is None or credential_data is None or index_name is None:
|
|
222
|
+
return False, "'uri', 'credential', and 'driver_params.index_name' are required."
|
|
223
|
+
|
|
224
|
+
client = await cls._get_async_es_client(uri, credential_data)
|
|
225
|
+
try:
|
|
226
|
+
response = await client.security.has_privileges(
|
|
227
|
+
body={"index": [{"names": [index_name], "privileges": ["read"]}]}
|
|
228
|
+
)
|
|
229
|
+
if response.get("has_all_requested"):
|
|
230
|
+
return True, "User has sufficient privileges for the index."
|
|
231
|
+
else:
|
|
232
|
+
return False, f"User lacks 'read' privilege for index '{index_name}'."
|
|
233
|
+
except Exception as e:
|
|
234
|
+
logger.warning(f"Could not verify privileges via security API (may not be enabled): {e}. Assuming success.")
|
|
235
|
+
return True, "Could not verify privileges via security API; assuming success."
|
|
236
|
+
finally:
|
|
237
|
+
await client.close()
|
|
238
|
+
|
|
239
|
+
@classmethod
|
|
240
|
+
async def get_wizard_steps(cls) -> Dict[str, Any]:
|
|
241
|
+
return {
|
|
242
|
+
"steps": [
|
|
243
|
+
{
|
|
244
|
+
"step_id": "connection",
|
|
245
|
+
"title": "Connection Details",
|
|
246
|
+
"schema": {
|
|
247
|
+
"type": "object",
|
|
248
|
+
"properties": {
|
|
249
|
+
"uri": {"type": "string", "title": "Elasticsearch URI"},
|
|
250
|
+
"credential": {
|
|
251
|
+
"type": "object",
|
|
252
|
+
"title": "Credentials",
|
|
253
|
+
"oneOf": [
|
|
254
|
+
{"$ref": "#/components/schemas/PasswdCredential"},
|
|
255
|
+
{"$ref": "#/components/schemas/ApiKeyCredential"}
|
|
256
|
+
]
|
|
257
|
+
}
|
|
258
|
+
},
|
|
259
|
+
"required": ["uri", "credential"]
|
|
260
|
+
},
|
|
261
|
+
"validations": ["test_connection"]
|
|
262
|
+
},
|
|
263
|
+
{
|
|
264
|
+
"step_id": "index_config",
|
|
265
|
+
"title": "Index Configuration",
|
|
266
|
+
"schema": {
|
|
267
|
+
"type": "object",
|
|
268
|
+
"properties": {
|
|
269
|
+
"driver_params": {
|
|
270
|
+
"type": "object",
|
|
271
|
+
"title": "Driver Parameters",
|
|
272
|
+
"properties": {
|
|
273
|
+
"index_name": {"type": "string", "title": "Index Name"},
|
|
274
|
+
"timestamp_field": {"type": "string", "title": "Timestamp Field"}
|
|
275
|
+
},
|
|
276
|
+
"required": ["index_name", "timestamp_field"]
|
|
277
|
+
}
|
|
278
|
+
},
|
|
279
|
+
"required": ["driver_params"]
|
|
280
|
+
},
|
|
281
|
+
"validations": ["check_privileges", "discover_fields_no_cache"]
|
|
282
|
+
}
|
|
283
|
+
],
|
|
284
|
+
"components": {
|
|
285
|
+
"schemas": {
|
|
286
|
+
"PasswdCredential": {
|
|
287
|
+
"type": "object",
|
|
288
|
+
"title": "Username/Password",
|
|
289
|
+
"properties": {
|
|
290
|
+
"user": {"type": "string", "title": "Username"},
|
|
291
|
+
"passwd": {"type": "string", "title": "Password", "format": "password"}
|
|
292
|
+
},
|
|
293
|
+
"required": ["user"]
|
|
294
|
+
},
|
|
295
|
+
"ApiKeyCredential": {
|
|
296
|
+
"type": "object",
|
|
297
|
+
"title": "API Key",
|
|
298
|
+
"properties": {
|
|
299
|
+
"key": {"type": "string", "title": "API Key", "format": "password"}
|
|
300
|
+
},
|
|
301
|
+
"required": ["key"]
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
def _normalize_doc(hit: Dict[str, Any]) -> Dict[str, Any]:
|
|
308
|
+
doc = hit.get('_source', {})
|
|
309
|
+
doc['_id'] = hit.get('_id')
|
|
310
|
+
doc['_index'] = hit.get('_index')
|
|
311
|
+
return doc
|
|
File without changes
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fustor-source-elasticsearch
|
|
3
|
+
Version: 0.1.4
|
|
4
|
+
Summary: An Elasticsearch source for Fustor Agent
|
|
5
|
+
Author-email: Huajin Wang <wanghuajin999@163.com>
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: elasticsearch>=9.2.0
|
|
8
|
+
Requires-Dist: fustor-core
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
fustor_source_elasticsearch/__init__.py,sha256=ww7iz8OK1XrzM9XxBOqTx8hd9fa35lDm5MZrLUl7Boc,13836
|
|
2
|
+
fustor_source_elasticsearch/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
fustor_source_elasticsearch-0.1.4.dist-info/METADATA,sha256=xC3-coD6pANIVGITzw52Uv6uOoYwHXOVq5WBkqqwRV8,258
|
|
4
|
+
fustor_source_elasticsearch-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
5
|
+
fustor_source_elasticsearch-0.1.4.dist-info/entry_points.txt,sha256=6ra2kkIbMjl9GGlMiCZOEmQauXZ69eotLbhw6R19LWM,95
|
|
6
|
+
fustor_source_elasticsearch-0.1.4.dist-info/top_level.txt,sha256=5yN8-ZDiTMdXAdqhUizGREqzj9bvxRz0MUZ_96bDzfg,28
|
|
7
|
+
fustor_source_elasticsearch-0.1.4.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
fustor_source_elasticsearch
|