sentry-nodestore-elastic 1.0.2__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sentry_nodestore_elastic-1.0.2/sentry_nodestore_elastic.egg-info → sentry_nodestore_elastic-1.1.0}/PKG-INFO +8 -7
- {sentry_nodestore_elastic-1.0.2 → sentry_nodestore_elastic-1.1.0}/README.md +3 -3
- sentry_nodestore_elastic-1.1.0/sentry_nodestore_elastic/backend.py +534 -0
- {sentry_nodestore_elastic-1.0.2 → sentry_nodestore_elastic-1.1.0/sentry_nodestore_elastic.egg-info}/PKG-INFO +8 -7
- sentry_nodestore_elastic-1.1.0/sentry_nodestore_elastic.egg-info/requires.txt +2 -0
- {sentry_nodestore_elastic-1.0.2 → sentry_nodestore_elastic-1.1.0}/setup.py +3 -3
- sentry_nodestore_elastic-1.0.2/sentry_nodestore_elastic/backend.py +0 -241
- sentry_nodestore_elastic-1.0.2/sentry_nodestore_elastic.egg-info/requires.txt +0 -2
- {sentry_nodestore_elastic-1.0.2 → sentry_nodestore_elastic-1.1.0}/LICENSE +0 -0
- {sentry_nodestore_elastic-1.0.2 → sentry_nodestore_elastic-1.1.0}/MANIFEST.in +0 -0
- {sentry_nodestore_elastic-1.0.2 → sentry_nodestore_elastic-1.1.0}/sentry_nodestore_elastic/__init__.py +0 -0
- {sentry_nodestore_elastic-1.0.2 → sentry_nodestore_elastic-1.1.0}/sentry_nodestore_elastic.egg-info/SOURCES.txt +0 -0
- {sentry_nodestore_elastic-1.0.2 → sentry_nodestore_elastic-1.1.0}/sentry_nodestore_elastic.egg-info/dependency_links.txt +0 -0
- {sentry_nodestore_elastic-1.0.2 → sentry_nodestore_elastic-1.1.0}/sentry_nodestore_elastic.egg-info/not-zip-safe +0 -0
- {sentry_nodestore_elastic-1.0.2 → sentry_nodestore_elastic-1.1.0}/sentry_nodestore_elastic.egg-info/top_level.txt +0 -0
- {sentry_nodestore_elastic-1.0.2 → sentry_nodestore_elastic-1.1.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: sentry-nodestore-elastic
|
|
3
|
-
Version: 1.0
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: Sentry nodestore Elasticsearch backend
|
|
5
5
|
Home-page: https://github.com/andrsp/sentry-nodestore-elastic
|
|
6
6
|
Author: andrsp@gmail.com
|
|
@@ -19,8 +19,8 @@ Classifier: Programming Language :: Python
|
|
|
19
19
|
Classifier: Operating System :: OS Independent
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
21
21
|
License-File: LICENSE
|
|
22
|
-
Requires-Dist: sentry
|
|
23
|
-
Requires-Dist: elasticsearch
|
|
22
|
+
Requires-Dist: sentry<27.0.0,>=26.1.0
|
|
23
|
+
Requires-Dist: elasticsearch<9.0.0,>=8.0.0
|
|
24
24
|
Dynamic: author
|
|
25
25
|
Dynamic: author-email
|
|
26
26
|
Dynamic: classifier
|
|
@@ -29,6 +29,7 @@ Dynamic: description-content-type
|
|
|
29
29
|
Dynamic: home-page
|
|
30
30
|
Dynamic: keywords
|
|
31
31
|
Dynamic: license
|
|
32
|
+
Dynamic: license-file
|
|
32
33
|
Dynamic: project-url
|
|
33
34
|
Dynamic: requires-dist
|
|
34
35
|
Dynamic: summary
|
|
@@ -39,7 +40,7 @@ Sentry nodestore Elasticsearch backend
|
|
|
39
40
|
|
|
40
41
|
[](https://pypi.python.org/pypi/sentry-nodestore-elastic)
|
|
41
42
|
|
|
42
|
-
Supported Sentry
|
|
43
|
+
Supported Sentry 26.1.0+ & elasticsearch 8.x versions
|
|
43
44
|
|
|
44
45
|
Use Elasticsearch cluster for store node objects from Sentry
|
|
45
46
|
|
|
@@ -56,7 +57,7 @@ Switching nodestore to dedicated Elasticsearch cluster provides more scalability
|
|
|
56
57
|
Rebuild sentry docker image with nodestore package installation
|
|
57
58
|
|
|
58
59
|
``` shell
|
|
59
|
-
FROM getsentry/sentry:
|
|
60
|
+
FROM getsentry/sentry:26.1.0
|
|
60
61
|
RUN pip install sentry-nodestore-elastic
|
|
61
62
|
```
|
|
62
63
|
|
|
@@ -205,7 +206,7 @@ while True:
|
|
|
205
206
|
|
|
206
207
|
bulk(es, bulk_data)
|
|
207
208
|
count = count - 2000
|
|
208
|
-
print(f"
|
|
209
|
+
print(f"Remaining rows: {count}")
|
|
209
210
|
|
|
210
211
|
cursor.close()
|
|
211
212
|
conn.close()
|
|
@@ -4,7 +4,7 @@ Sentry nodestore Elasticsearch backend
|
|
|
4
4
|
|
|
5
5
|
[](https://pypi.python.org/pypi/sentry-nodestore-elastic)
|
|
6
6
|
|
|
7
|
-
Supported Sentry
|
|
7
|
+
Supported Sentry 26.1.0+ & elasticsearch 8.x versions
|
|
8
8
|
|
|
9
9
|
Use Elasticsearch cluster for store node objects from Sentry
|
|
10
10
|
|
|
@@ -21,7 +21,7 @@ Switching nodestore to dedicated Elasticsearch cluster provides more scalability
|
|
|
21
21
|
Rebuild sentry docker image with nodestore package installation
|
|
22
22
|
|
|
23
23
|
``` shell
|
|
24
|
-
FROM getsentry/sentry:
|
|
24
|
+
FROM getsentry/sentry:26.1.0
|
|
25
25
|
RUN pip install sentry-nodestore-elastic
|
|
26
26
|
```
|
|
27
27
|
|
|
@@ -170,7 +170,7 @@ while True:
|
|
|
170
170
|
|
|
171
171
|
bulk(es, bulk_data)
|
|
172
172
|
count = count - 2000
|
|
173
|
-
print(f"
|
|
173
|
+
print(f"Remaining rows: {count}")
|
|
174
174
|
|
|
175
175
|
cursor.close()
|
|
176
176
|
conn.close()
|
|
@@ -0,0 +1,534 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
import zlib
|
|
6
|
+
from typing import Optional, List, Any
|
|
7
|
+
import elasticsearch
|
|
8
|
+
from elasticsearch import Elasticsearch
|
|
9
|
+
from sentry.nodestore.base import NodeStorage
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger("sentry.nodestore.elastic")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ElasticNodeStorage(NodeStorage):
|
|
15
|
+
"""
|
|
16
|
+
Elasticsearch backend for Sentry nodestore.
|
|
17
|
+
|
|
18
|
+
This backend stores Sentry node objects in Elasticsearch instead of PostgreSQL,
|
|
19
|
+
providing better scalability and performance for high-load environments.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
logger = logger
|
|
23
|
+
encoding = 'utf-8'
|
|
24
|
+
|
|
25
|
+
# Index name pattern for date-based indices
|
|
26
|
+
INDEX_DATE_PATTERN = re.compile(r'^sentry-(\d{4}-\d{2}-\d{2})')
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
es: Elasticsearch,
|
|
31
|
+
index: str = 'sentry-{date}',
|
|
32
|
+
refresh: bool = False,
|
|
33
|
+
template_name: str = 'sentry',
|
|
34
|
+
alias_name: str = 'sentry',
|
|
35
|
+
validate_es: bool = False,
|
|
36
|
+
) -> None:
|
|
37
|
+
"""
|
|
38
|
+
Initialize Elasticsearch nodestore backend.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
es: Elasticsearch client instance
|
|
42
|
+
index: Index name pattern with {date} placeholder (default: 'sentry-{date}')
|
|
43
|
+
refresh: Whether to refresh index after writes (default: False for better performance)
|
|
44
|
+
template_name: Name of the index template (default: 'sentry')
|
|
45
|
+
alias_name: Name of the index alias (default: 'sentry')
|
|
46
|
+
validate_es: Whether to validate Elasticsearch connection on init (default: False)
|
|
47
|
+
"""
|
|
48
|
+
if not isinstance(es, Elasticsearch):
|
|
49
|
+
raise TypeError("es parameter must be an Elasticsearch client instance")
|
|
50
|
+
|
|
51
|
+
self.es = es
|
|
52
|
+
self.index = index
|
|
53
|
+
self.refresh = refresh
|
|
54
|
+
self.template_name = template_name
|
|
55
|
+
self.alias_name = alias_name
|
|
56
|
+
self.validate_es = validate_es
|
|
57
|
+
|
|
58
|
+
if self.validate_es:
|
|
59
|
+
try:
|
|
60
|
+
self.es.info()
|
|
61
|
+
except Exception as e:
|
|
62
|
+
raise ConnectionError(f"Failed to connect to Elasticsearch: {e}") from e
|
|
63
|
+
|
|
64
|
+
super(ElasticNodeStorage, self).__init__()
|
|
65
|
+
|
|
66
|
+
def bootstrap(self) -> None:
|
|
67
|
+
"""
|
|
68
|
+
Bootstrap Elasticsearch index template.
|
|
69
|
+
|
|
70
|
+
Creates an index template if it doesn't exist. Does not overwrite
|
|
71
|
+
existing templates to allow manual customization.
|
|
72
|
+
"""
|
|
73
|
+
try:
|
|
74
|
+
# Do not overwrite existing template with same name
|
|
75
|
+
# It may have been changed in elastic manually after creation
|
|
76
|
+
# or created manually before sentry initialization
|
|
77
|
+
self.es.indices.get_index_template(name=self.template_name)
|
|
78
|
+
self.logger.info(
|
|
79
|
+
"bootstrap.template.check",
|
|
80
|
+
extra={
|
|
81
|
+
"template": self.template_name,
|
|
82
|
+
"status": "exists"
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
except elasticsearch.exceptions.NotFoundError:
|
|
86
|
+
self.logger.info(
|
|
87
|
+
"bootstrap.template.check",
|
|
88
|
+
extra={
|
|
89
|
+
"template": self.template_name,
|
|
90
|
+
"status": "not found"
|
|
91
|
+
}
|
|
92
|
+
)
|
|
93
|
+
try:
|
|
94
|
+
self.es.indices.put_index_template(
|
|
95
|
+
create=True,
|
|
96
|
+
name=self.template_name,
|
|
97
|
+
index_patterns=["sentry-*"],
|
|
98
|
+
template={
|
|
99
|
+
"settings": {
|
|
100
|
+
"index": {
|
|
101
|
+
"number_of_shards": 3,
|
|
102
|
+
"number_of_replicas": 0
|
|
103
|
+
}
|
|
104
|
+
},
|
|
105
|
+
"mappings": {
|
|
106
|
+
"_source": {
|
|
107
|
+
"enabled": False
|
|
108
|
+
},
|
|
109
|
+
"dynamic": "false",
|
|
110
|
+
"dynamic_templates": [],
|
|
111
|
+
"properties": {
|
|
112
|
+
"data": {
|
|
113
|
+
"type": "text",
|
|
114
|
+
"index": False,
|
|
115
|
+
"store": True
|
|
116
|
+
},
|
|
117
|
+
"timestamp": {
|
|
118
|
+
"type": "date",
|
|
119
|
+
"store": True
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
},
|
|
123
|
+
"aliases": {
|
|
124
|
+
self.alias_name: {}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
)
|
|
128
|
+
self.logger.info(
|
|
129
|
+
"bootstrap.template.create",
|
|
130
|
+
extra={
|
|
131
|
+
"template": self.template_name,
|
|
132
|
+
"alias": self.alias_name
|
|
133
|
+
}
|
|
134
|
+
)
|
|
135
|
+
except elasticsearch.exceptions.RequestError as e:
|
|
136
|
+
self.logger.error(
|
|
137
|
+
"bootstrap.template.create.error",
|
|
138
|
+
extra={
|
|
139
|
+
"template": self.template_name,
|
|
140
|
+
"error": str(e)
|
|
141
|
+
},
|
|
142
|
+
exc_info=True
|
|
143
|
+
)
|
|
144
|
+
raise
|
|
145
|
+
|
|
146
|
+
def _get_write_index(self) -> str:
|
|
147
|
+
"""Get the index name for writing based on current date."""
|
|
148
|
+
return self.index.format(date=datetime.now(timezone.utc).strftime('%Y-%m-%d'))
|
|
149
|
+
|
|
150
|
+
def _get_read_index(self, id: str) -> Optional[str]:
|
|
151
|
+
"""
|
|
152
|
+
Get the index name containing the document with given ID.
|
|
153
|
+
|
|
154
|
+
Optimized to use direct get through alias instead of search query.
|
|
155
|
+
Falls back to search if direct get fails (for backward compatibility).
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
id: Document ID to find
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Index name containing the document, or None if not found
|
|
162
|
+
"""
|
|
163
|
+
# Try direct get through alias first (more efficient)
|
|
164
|
+
try:
|
|
165
|
+
# Use _source: false and stored_fields to avoid loading document data
|
|
166
|
+
response = self.es.get(
|
|
167
|
+
id=id,
|
|
168
|
+
index=self.alias_name,
|
|
169
|
+
_source=False,
|
|
170
|
+
stored_fields="_none_"
|
|
171
|
+
)
|
|
172
|
+
return response.get('_index')
|
|
173
|
+
except elasticsearch.exceptions.NotFoundError:
|
|
174
|
+
return None
|
|
175
|
+
except elasticsearch.exceptions.RequestError:
|
|
176
|
+
# Fallback to search if direct get fails (e.g., alias routing issues)
|
|
177
|
+
try:
|
|
178
|
+
search = self.es.search(
|
|
179
|
+
index=self.alias_name,
|
|
180
|
+
body={
|
|
181
|
+
"query": {
|
|
182
|
+
"term": {
|
|
183
|
+
"_id": id
|
|
184
|
+
}
|
|
185
|
+
},
|
|
186
|
+
"size": 1,
|
|
187
|
+
"_source": False
|
|
188
|
+
}
|
|
189
|
+
)
|
|
190
|
+
if search["hits"]["total"]["value"] == 1:
|
|
191
|
+
return search["hits"]["hits"][0]["_index"]
|
|
192
|
+
except Exception as e:
|
|
193
|
+
self.logger.warning(
|
|
194
|
+
"document.get_index.error",
|
|
195
|
+
extra={
|
|
196
|
+
"doc_id": id,
|
|
197
|
+
"error": str(e)
|
|
198
|
+
}
|
|
199
|
+
)
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
def _compress(self, data: bytes) -> str:
|
|
203
|
+
"""
|
|
204
|
+
Compress and encode data for storage.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
data: Raw bytes to compress
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Base64-encoded compressed string
|
|
211
|
+
"""
|
|
212
|
+
if not isinstance(data, bytes):
|
|
213
|
+
raise TypeError(f"data must be bytes, got {type(data)}")
|
|
214
|
+
return base64.b64encode(zlib.compress(data)).decode(self.encoding)
|
|
215
|
+
|
|
216
|
+
def _decompress(self, data: str) -> bytes:
|
|
217
|
+
"""
|
|
218
|
+
Decompress and decode data from storage.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
data: Base64-encoded compressed string
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
Decompressed bytes
|
|
225
|
+
"""
|
|
226
|
+
if not isinstance(data, str):
|
|
227
|
+
raise TypeError(f"data must be str, got {type(data)}")
|
|
228
|
+
try:
|
|
229
|
+
return zlib.decompress(base64.b64decode(data))
|
|
230
|
+
except (ValueError, zlib.error) as e:
|
|
231
|
+
raise ValueError(f"Failed to decompress data: {e}") from e
|
|
232
|
+
|
|
233
|
+
def delete(self, id: str) -> None:
|
|
234
|
+
"""
|
|
235
|
+
Delete a node by ID.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
id: Document ID to delete
|
|
239
|
+
|
|
240
|
+
Example:
|
|
241
|
+
>>> nodestore.delete('key1')
|
|
242
|
+
"""
|
|
243
|
+
if not id:
|
|
244
|
+
raise ValueError("id cannot be empty")
|
|
245
|
+
|
|
246
|
+
try:
|
|
247
|
+
# Use direct delete instead of delete_by_query for better performance
|
|
248
|
+
index = self._get_read_index(id)
|
|
249
|
+
if index:
|
|
250
|
+
self.es.delete(id=id, index=index, refresh=self.refresh)
|
|
251
|
+
else:
|
|
252
|
+
# Fallback to delete_by_query if index not found
|
|
253
|
+
self.es.delete_by_query(
|
|
254
|
+
index=self.alias_name,
|
|
255
|
+
query={
|
|
256
|
+
"term": {
|
|
257
|
+
"_id": id
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
)
|
|
261
|
+
self.logger.info(
|
|
262
|
+
"document.delete.executed",
|
|
263
|
+
extra={
|
|
264
|
+
"doc_id": id
|
|
265
|
+
}
|
|
266
|
+
)
|
|
267
|
+
except elasticsearch.exceptions.NotFoundError:
|
|
268
|
+
# Document doesn't exist, which is fine
|
|
269
|
+
pass
|
|
270
|
+
except elasticsearch.exceptions.ConflictError:
|
|
271
|
+
# Concurrent deletion, which is fine
|
|
272
|
+
pass
|
|
273
|
+
except Exception as e:
|
|
274
|
+
self.logger.error(
|
|
275
|
+
"document.delete.error",
|
|
276
|
+
extra={
|
|
277
|
+
"doc_id": id,
|
|
278
|
+
"error": str(e)
|
|
279
|
+
},
|
|
280
|
+
exc_info=True
|
|
281
|
+
)
|
|
282
|
+
raise
|
|
283
|
+
|
|
284
|
+
def delete_multi(self, id_list: List[str]) -> None:
|
|
285
|
+
"""
|
|
286
|
+
Delete multiple nodes.
|
|
287
|
+
|
|
288
|
+
Note: This is not guaranteed to be atomic and may result in a partial
|
|
289
|
+
delete.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
id_list: List of document IDs to delete
|
|
293
|
+
|
|
294
|
+
Example:
|
|
295
|
+
>>> delete_multi(['key1', 'key2'])
|
|
296
|
+
"""
|
|
297
|
+
if not id_list:
|
|
298
|
+
return
|
|
299
|
+
|
|
300
|
+
if not isinstance(id_list, list):
|
|
301
|
+
raise TypeError(f"id_list must be a list, got {type(id_list)}")
|
|
302
|
+
|
|
303
|
+
try:
|
|
304
|
+
response = self.es.delete_by_query(
|
|
305
|
+
index=self.alias_name,
|
|
306
|
+
query={
|
|
307
|
+
"ids": {
|
|
308
|
+
"values": id_list
|
|
309
|
+
}
|
|
310
|
+
},
|
|
311
|
+
refresh=self.refresh
|
|
312
|
+
)
|
|
313
|
+
self.logger.info(
|
|
314
|
+
"document.delete_multi.executed",
|
|
315
|
+
extra={
|
|
316
|
+
"docs_to_delete": len(id_list),
|
|
317
|
+
"docs_deleted": response.get("deleted", 0)
|
|
318
|
+
}
|
|
319
|
+
)
|
|
320
|
+
except elasticsearch.exceptions.NotFoundError:
|
|
321
|
+
# Indices don't exist, which is fine
|
|
322
|
+
pass
|
|
323
|
+
except elasticsearch.exceptions.ConflictError:
|
|
324
|
+
# Concurrent deletion, which is fine
|
|
325
|
+
pass
|
|
326
|
+
except Exception as e:
|
|
327
|
+
self.logger.error(
|
|
328
|
+
"document.delete_multi.error",
|
|
329
|
+
extra={
|
|
330
|
+
"docs_to_delete": len(id_list),
|
|
331
|
+
"error": str(e)
|
|
332
|
+
},
|
|
333
|
+
exc_info=True
|
|
334
|
+
)
|
|
335
|
+
raise
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def _get_bytes(self, id: str) -> Optional[bytes]:
|
|
339
|
+
"""
|
|
340
|
+
Get raw bytes for a node by ID.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
id: Document ID to retrieve
|
|
344
|
+
|
|
345
|
+
Returns:
|
|
346
|
+
Decompressed bytes, or None if not found
|
|
347
|
+
|
|
348
|
+
Example:
|
|
349
|
+
>>> nodestore._get_bytes('key1')
|
|
350
|
+
b'{"message": "hello world"}'
|
|
351
|
+
"""
|
|
352
|
+
if not id:
|
|
353
|
+
return None
|
|
354
|
+
|
|
355
|
+
index = self._get_read_index(id)
|
|
356
|
+
|
|
357
|
+
if index is not None:
|
|
358
|
+
try:
|
|
359
|
+
response = self.es.get(id=id, index=index, stored_fields=["data"])
|
|
360
|
+
if 'fields' in response and 'data' in response['fields']:
|
|
361
|
+
return self._decompress(response['fields']['data'][0])
|
|
362
|
+
else:
|
|
363
|
+
self.logger.warning(
|
|
364
|
+
"document.get.warning",
|
|
365
|
+
extra={
|
|
366
|
+
"doc_id": id,
|
|
367
|
+
"index": index,
|
|
368
|
+
"error": "data field not found in response"
|
|
369
|
+
}
|
|
370
|
+
)
|
|
371
|
+
return None
|
|
372
|
+
except elasticsearch.exceptions.NotFoundError:
|
|
373
|
+
return None
|
|
374
|
+
except Exception as e:
|
|
375
|
+
self.logger.error(
|
|
376
|
+
"document.get.error",
|
|
377
|
+
extra={
|
|
378
|
+
"doc_id": id,
|
|
379
|
+
"index": index,
|
|
380
|
+
"error": str(e)
|
|
381
|
+
},
|
|
382
|
+
exc_info=True
|
|
383
|
+
)
|
|
384
|
+
return None
|
|
385
|
+
else:
|
|
386
|
+
self.logger.warning(
|
|
387
|
+
"document.get.warning",
|
|
388
|
+
extra={
|
|
389
|
+
"doc_id": id,
|
|
390
|
+
"error": "index containing doc_id not found"
|
|
391
|
+
}
|
|
392
|
+
)
|
|
393
|
+
return None
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def _set_bytes(self, id: str, data: bytes, ttl: Optional[int] = None) -> None:
|
|
397
|
+
"""
|
|
398
|
+
Set raw bytes for a node by ID.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
id: Document ID
|
|
402
|
+
data: Raw bytes to store
|
|
403
|
+
ttl: Time to live in seconds (not currently used, reserved for future use)
|
|
404
|
+
|
|
405
|
+
Example:
|
|
406
|
+
>>> nodestore._set_bytes('key1', b"{'foo': 'bar'}")
|
|
407
|
+
"""
|
|
408
|
+
if not id:
|
|
409
|
+
raise ValueError("id cannot be empty")
|
|
410
|
+
|
|
411
|
+
if not isinstance(data, bytes):
|
|
412
|
+
raise TypeError(f"data must be bytes, got {type(data)}")
|
|
413
|
+
|
|
414
|
+
index = self._get_write_index()
|
|
415
|
+
try:
|
|
416
|
+
self.es.index(
|
|
417
|
+
id=id,
|
|
418
|
+
index=index,
|
|
419
|
+
document={
|
|
420
|
+
'data': self._compress(data),
|
|
421
|
+
'timestamp': datetime.now(timezone.utc).isoformat()
|
|
422
|
+
},
|
|
423
|
+
refresh=self.refresh,
|
|
424
|
+
)
|
|
425
|
+
except Exception as e:
|
|
426
|
+
self.logger.error(
|
|
427
|
+
"document.set.error",
|
|
428
|
+
extra={
|
|
429
|
+
"doc_id": id,
|
|
430
|
+
"index": index,
|
|
431
|
+
"error": str(e)
|
|
432
|
+
},
|
|
433
|
+
exc_info=True
|
|
434
|
+
)
|
|
435
|
+
raise
|
|
436
|
+
|
|
437
|
+
def cleanup(self, cutoff: datetime) -> None:
|
|
438
|
+
"""
|
|
439
|
+
Clean up indices older than the cutoff date.
|
|
440
|
+
|
|
441
|
+
Args:
|
|
442
|
+
cutoff: Datetime threshold - indices older than this will be deleted
|
|
443
|
+
"""
|
|
444
|
+
if not isinstance(cutoff, datetime):
|
|
445
|
+
raise TypeError(f"cutoff must be a datetime object, got {type(cutoff)}")
|
|
446
|
+
|
|
447
|
+
# Ensure cutoff is timezone-aware
|
|
448
|
+
if cutoff.tzinfo is None:
|
|
449
|
+
cutoff = cutoff.replace(tzinfo=timezone.utc)
|
|
450
|
+
|
|
451
|
+
try:
|
|
452
|
+
alias_indices = self.es.indices.get_alias(index=self.alias_name)
|
|
453
|
+
except elasticsearch.exceptions.NotFoundError:
|
|
454
|
+
self.logger.warning(
|
|
455
|
+
"cleanup.alias.not_found",
|
|
456
|
+
extra={
|
|
457
|
+
"alias": self.alias_name
|
|
458
|
+
}
|
|
459
|
+
)
|
|
460
|
+
return
|
|
461
|
+
|
|
462
|
+
deleted_count = 0
|
|
463
|
+
skipped_count = 0
|
|
464
|
+
|
|
465
|
+
for index in alias_indices:
|
|
466
|
+
# Parse date from index name using regex for more robust parsing
|
|
467
|
+
# Handles indices with postfixes like '-fixed' or '-reindex'
|
|
468
|
+
match = self.INDEX_DATE_PATTERN.match(index)
|
|
469
|
+
if not match:
|
|
470
|
+
self.logger.warning(
|
|
471
|
+
"cleanup.index.skip",
|
|
472
|
+
extra={
|
|
473
|
+
"index": index,
|
|
474
|
+
"reason": "index name does not match expected pattern"
|
|
475
|
+
}
|
|
476
|
+
)
|
|
477
|
+
skipped_count += 1
|
|
478
|
+
continue
|
|
479
|
+
|
|
480
|
+
try:
|
|
481
|
+
index_date_str = match.group(1)
|
|
482
|
+
index_ts = datetime.strptime(index_date_str, "%Y-%m-%d").replace(
|
|
483
|
+
tzinfo=timezone.utc
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
if index_ts < cutoff:
|
|
487
|
+
try:
|
|
488
|
+
self.es.indices.delete(index=index)
|
|
489
|
+
deleted_count += 1
|
|
490
|
+
self.logger.info(
|
|
491
|
+
"index.delete.executed",
|
|
492
|
+
extra={
|
|
493
|
+
"index": index,
|
|
494
|
+
"index_ts": index_ts.timestamp(),
|
|
495
|
+
"cutoff_ts": cutoff.timestamp(),
|
|
496
|
+
"status": "deleted"
|
|
497
|
+
}
|
|
498
|
+
)
|
|
499
|
+
except elasticsearch.exceptions.NotFoundError:
|
|
500
|
+
self.logger.info(
|
|
501
|
+
"index.delete.error",
|
|
502
|
+
extra={
|
|
503
|
+
"index": index,
|
|
504
|
+
"error": "not found"
|
|
505
|
+
}
|
|
506
|
+
)
|
|
507
|
+
except Exception as e:
|
|
508
|
+
self.logger.error(
|
|
509
|
+
"index.delete.error",
|
|
510
|
+
extra={
|
|
511
|
+
"index": index,
|
|
512
|
+
"error": str(e)
|
|
513
|
+
},
|
|
514
|
+
exc_info=True
|
|
515
|
+
)
|
|
516
|
+
except ValueError as e:
|
|
517
|
+
self.logger.warning(
|
|
518
|
+
"cleanup.index.skip",
|
|
519
|
+
extra={
|
|
520
|
+
"index": index,
|
|
521
|
+
"reason": f"failed to parse date: {e}"
|
|
522
|
+
}
|
|
523
|
+
)
|
|
524
|
+
skipped_count += 1
|
|
525
|
+
|
|
526
|
+
self.logger.info(
|
|
527
|
+
"cleanup.completed",
|
|
528
|
+
extra={
|
|
529
|
+
"cutoff_ts": cutoff.timestamp(),
|
|
530
|
+
"deleted_count": deleted_count,
|
|
531
|
+
"skipped_count": skipped_count,
|
|
532
|
+
"total_checked": len(alias_indices)
|
|
533
|
+
}
|
|
534
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: sentry-nodestore-elastic
|
|
3
|
-
Version: 1.0
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: Sentry nodestore Elasticsearch backend
|
|
5
5
|
Home-page: https://github.com/andrsp/sentry-nodestore-elastic
|
|
6
6
|
Author: andrsp@gmail.com
|
|
@@ -19,8 +19,8 @@ Classifier: Programming Language :: Python
|
|
|
19
19
|
Classifier: Operating System :: OS Independent
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
21
21
|
License-File: LICENSE
|
|
22
|
-
Requires-Dist: sentry
|
|
23
|
-
Requires-Dist: elasticsearch
|
|
22
|
+
Requires-Dist: sentry<27.0.0,>=26.1.0
|
|
23
|
+
Requires-Dist: elasticsearch<9.0.0,>=8.0.0
|
|
24
24
|
Dynamic: author
|
|
25
25
|
Dynamic: author-email
|
|
26
26
|
Dynamic: classifier
|
|
@@ -29,6 +29,7 @@ Dynamic: description-content-type
|
|
|
29
29
|
Dynamic: home-page
|
|
30
30
|
Dynamic: keywords
|
|
31
31
|
Dynamic: license
|
|
32
|
+
Dynamic: license-file
|
|
32
33
|
Dynamic: project-url
|
|
33
34
|
Dynamic: requires-dist
|
|
34
35
|
Dynamic: summary
|
|
@@ -39,7 +40,7 @@ Sentry nodestore Elasticsearch backend
|
|
|
39
40
|
|
|
40
41
|
[](https://pypi.python.org/pypi/sentry-nodestore-elastic)
|
|
41
42
|
|
|
42
|
-
Supported Sentry
|
|
43
|
+
Supported Sentry 26.1.0+ & elasticsearch 8.x versions
|
|
43
44
|
|
|
44
45
|
Use Elasticsearch cluster for store node objects from Sentry
|
|
45
46
|
|
|
@@ -56,7 +57,7 @@ Switching nodestore to dedicated Elasticsearch cluster provides more scalability
|
|
|
56
57
|
Rebuild sentry docker image with nodestore package installation
|
|
57
58
|
|
|
58
59
|
``` shell
|
|
59
|
-
FROM getsentry/sentry:
|
|
60
|
+
FROM getsentry/sentry:26.1.0
|
|
60
61
|
RUN pip install sentry-nodestore-elastic
|
|
61
62
|
```
|
|
62
63
|
|
|
@@ -205,7 +206,7 @@ while True:
|
|
|
205
206
|
|
|
206
207
|
bulk(es, bulk_data)
|
|
207
208
|
count = count - 2000
|
|
208
|
-
print(f"
|
|
209
|
+
print(f"Remaining rows: {count}")
|
|
209
210
|
|
|
210
211
|
cursor.close()
|
|
211
212
|
conn.close()
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from setuptools import setup, find_namespace_packages
|
|
2
2
|
|
|
3
3
|
install_requires = [
|
|
4
|
-
'sentry
|
|
5
|
-
'elasticsearch
|
|
4
|
+
'sentry>=26.1.0,<27.0.0',
|
|
5
|
+
'elasticsearch>=8.0.0,<9.0.0',
|
|
6
6
|
]
|
|
7
7
|
|
|
8
8
|
with open("README.md", "r") as readme:
|
|
@@ -10,7 +10,7 @@ with open("README.md", "r") as readme:
|
|
|
10
10
|
|
|
11
11
|
setup(
|
|
12
12
|
name='sentry-nodestore-elastic',
|
|
13
|
-
version='1.0
|
|
13
|
+
version='1.1.0',
|
|
14
14
|
author='andrsp@gmail.com',
|
|
15
15
|
author_email='andrsp@gmail.com',
|
|
16
16
|
url='https://github.com/andrsp/sentry-nodestore-elastic',
|
|
@@ -1,241 +0,0 @@
|
|
|
1
|
-
import base64
|
|
2
|
-
from datetime import datetime, timezone
|
|
3
|
-
import logging
|
|
4
|
-
import zlib
|
|
5
|
-
import elasticsearch
|
|
6
|
-
from sentry.nodestore.base import NodeStorage
|
|
7
|
-
|
|
8
|
-
class ElasticNodeStorage(NodeStorage):
|
|
9
|
-
logger = logging.getLogger("sentry.nodestore.elastic")
|
|
10
|
-
|
|
11
|
-
encoding = 'utf-8'
|
|
12
|
-
|
|
13
|
-
def __init__(
|
|
14
|
-
self,
|
|
15
|
-
es,
|
|
16
|
-
index='sentry-{date}',
|
|
17
|
-
refresh=False,
|
|
18
|
-
template_name='sentry',
|
|
19
|
-
alias_name='sentry',
|
|
20
|
-
validate_es=False,
|
|
21
|
-
):
|
|
22
|
-
self.es = es
|
|
23
|
-
self.index = index
|
|
24
|
-
self.refresh = refresh
|
|
25
|
-
self.template_name = template_name
|
|
26
|
-
self.alias_name = alias_name
|
|
27
|
-
self.validate_es = validate_es
|
|
28
|
-
|
|
29
|
-
super(ElasticNodeStorage, self).__init__()
|
|
30
|
-
|
|
31
|
-
def bootstrap(self):
|
|
32
|
-
try:
|
|
33
|
-
# do not owerwrite existing template with same name
|
|
34
|
-
# it may have been changed in elastic manually after creation
|
|
35
|
-
# or created manually before sentry initialization
|
|
36
|
-
self.es.indices.get_index_template(name=self.template_name)
|
|
37
|
-
self.logger.info(
|
|
38
|
-
"bootstrap.template.check",
|
|
39
|
-
extra={
|
|
40
|
-
"template": self.template_name,
|
|
41
|
-
"status": "exists"
|
|
42
|
-
}
|
|
43
|
-
)
|
|
44
|
-
except elasticsearch.exceptions.NotFoundError:
|
|
45
|
-
self.logger.info(
|
|
46
|
-
"bootstrap.template.check",
|
|
47
|
-
extra={
|
|
48
|
-
"template": self.template_name,
|
|
49
|
-
"status": "not found"
|
|
50
|
-
}
|
|
51
|
-
)
|
|
52
|
-
self.es.indices.put_index_template(
|
|
53
|
-
create = True,
|
|
54
|
-
name = self.template_name,
|
|
55
|
-
index_patterns = [
|
|
56
|
-
"sentry-*"
|
|
57
|
-
],
|
|
58
|
-
template = {
|
|
59
|
-
"settings": {
|
|
60
|
-
"index": {
|
|
61
|
-
"number_of_shards": 3,
|
|
62
|
-
"number_of_replicas": 0
|
|
63
|
-
}
|
|
64
|
-
},
|
|
65
|
-
"mappings": {
|
|
66
|
-
"_source": {
|
|
67
|
-
"enabled": False
|
|
68
|
-
},
|
|
69
|
-
"dynamic": "false",
|
|
70
|
-
"dynamic_templates": [],
|
|
71
|
-
"properties": {
|
|
72
|
-
"data": {
|
|
73
|
-
"type": "text",
|
|
74
|
-
"index": False,
|
|
75
|
-
"store": True
|
|
76
|
-
},
|
|
77
|
-
"timestamp": {
|
|
78
|
-
"type": "date",
|
|
79
|
-
"store": True
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
},
|
|
83
|
-
"aliases": {
|
|
84
|
-
self.alias_name: {}
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
)
|
|
88
|
-
self.logger.info(
|
|
89
|
-
"bootstrap.template.create",
|
|
90
|
-
extra={
|
|
91
|
-
"template": self.template_name,
|
|
92
|
-
"alias": self.alias_name
|
|
93
|
-
}
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
def _get_write_index(self):
|
|
97
|
-
return self.index.format(date=datetime.today().strftime('%Y-%m-%d'))
|
|
98
|
-
|
|
99
|
-
def _get_read_index(self, id):
|
|
100
|
-
search = self.es.search(
|
|
101
|
-
index=self.alias_name,
|
|
102
|
-
body={
|
|
103
|
-
"query": {
|
|
104
|
-
"term": {
|
|
105
|
-
"_id": id
|
|
106
|
-
},
|
|
107
|
-
},
|
|
108
|
-
}
|
|
109
|
-
)
|
|
110
|
-
if search["hits"]["total"]["value"] == 1:
|
|
111
|
-
return search["hits"]["hits"][0]["_index"]
|
|
112
|
-
else:
|
|
113
|
-
return None
|
|
114
|
-
|
|
115
|
-
def _compress(self, data):
|
|
116
|
-
return base64.b64encode(zlib.compress(data)).decode(self.encoding)
|
|
117
|
-
|
|
118
|
-
def _decompress(self, data):
|
|
119
|
-
return zlib.decompress(base64.b64decode(data))
|
|
120
|
-
|
|
121
|
-
def delete(self, id):
|
|
122
|
-
"""
|
|
123
|
-
>>> nodestore.delete('key1')
|
|
124
|
-
"""
|
|
125
|
-
|
|
126
|
-
try:
|
|
127
|
-
self.logger.info(
|
|
128
|
-
"document.delete.executed",
|
|
129
|
-
extra={
|
|
130
|
-
"doc_id": id
|
|
131
|
-
}
|
|
132
|
-
)
|
|
133
|
-
self.es.delete_by_query(
|
|
134
|
-
index=self.alias_name,
|
|
135
|
-
query = {
|
|
136
|
-
"term": {
|
|
137
|
-
"_id": id
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
)
|
|
141
|
-
except elasticsearch.exceptions.NotFoundError:
|
|
142
|
-
pass
|
|
143
|
-
except elasticsearch.exceptions.ConflictError:
|
|
144
|
-
pass
|
|
145
|
-
|
|
146
|
-
def delete_multi(self, id_list):
|
|
147
|
-
"""
|
|
148
|
-
Delete multiple nodes.
|
|
149
|
-
Note: This is not guaranteed to be atomic and may result in a partial
|
|
150
|
-
delete.
|
|
151
|
-
>>> delete_multi(['key1', 'key2'])
|
|
152
|
-
"""
|
|
153
|
-
|
|
154
|
-
try:
|
|
155
|
-
response = self.es.delete_by_query(
|
|
156
|
-
index=self.alias_name,
|
|
157
|
-
query = {
|
|
158
|
-
"ids": {
|
|
159
|
-
"values": id_list
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
)
|
|
163
|
-
self.logger.info(
|
|
164
|
-
"document.delete_multi.executed",
|
|
165
|
-
extra={
|
|
166
|
-
"docs_to_delete": len(id_list),
|
|
167
|
-
"docs_deleted": response["deleted"]
|
|
168
|
-
}
|
|
169
|
-
)
|
|
170
|
-
except elasticsearch.exceptions.NotFoundError:
|
|
171
|
-
pass
|
|
172
|
-
except elasticsearch.exceptions.ConflictError:
|
|
173
|
-
pass
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
def _get_bytes(self, id):
|
|
177
|
-
"""
|
|
178
|
-
>>> nodestore._get_bytes('key1')
|
|
179
|
-
b'{"message": "hello world"}'
|
|
180
|
-
"""
|
|
181
|
-
index = self._get_read_index(id)
|
|
182
|
-
|
|
183
|
-
if index is not None:
|
|
184
|
-
try:
|
|
185
|
-
response = self.es.get(id=id, index=index, stored_fields=["data"])
|
|
186
|
-
except elasticsearch.exceptions.NotFoundError:
|
|
187
|
-
return None
|
|
188
|
-
else:
|
|
189
|
-
return self._decompress(response['fields']['data'][0])
|
|
190
|
-
else:
|
|
191
|
-
self.logger.warning(
|
|
192
|
-
"document.get.warning",
|
|
193
|
-
extra={
|
|
194
|
-
"doc_id": id,
|
|
195
|
-
"error": "index containing doc_id not found"
|
|
196
|
-
}
|
|
197
|
-
)
|
|
198
|
-
return None
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
def _set_bytes(self, id, data, ttl=None):
|
|
202
|
-
"""
|
|
203
|
-
>>> nodestore.set('key1', b"{'foo': 'bar'}")
|
|
204
|
-
"""
|
|
205
|
-
index = self._get_write_index()
|
|
206
|
-
self.es.index(
|
|
207
|
-
id=id,
|
|
208
|
-
index=index,
|
|
209
|
-
document={'data': self._compress(data), 'timestamp': datetime.utcnow().isoformat()},
|
|
210
|
-
refresh=self.refresh,
|
|
211
|
-
)
|
|
212
|
-
|
|
213
|
-
def cleanup(self, cutoff: datetime):
|
|
214
|
-
for index in self.es.indices.get_alias(index=self.alias_name):
|
|
215
|
-
# parse date from manually changed indices after reindex
|
|
216
|
-
# (they may have postfixes like '-fixed' or '-reindex')
|
|
217
|
-
index_date = '-'.join(index.split('-')[1:4])
|
|
218
|
-
index_ts = datetime.strptime(index_date, "%Y-%m-%d").replace(
|
|
219
|
-
tzinfo=timezone.utc
|
|
220
|
-
)
|
|
221
|
-
if index_ts < cutoff:
|
|
222
|
-
try:
|
|
223
|
-
self.es.indices.delete(index=index)
|
|
224
|
-
except elasticsearch.exceptions.NotFoundError:
|
|
225
|
-
self.logger.info(
|
|
226
|
-
"index.delete.error",
|
|
227
|
-
extra={
|
|
228
|
-
"index": index,
|
|
229
|
-
"error": "not found"
|
|
230
|
-
}
|
|
231
|
-
)
|
|
232
|
-
else:
|
|
233
|
-
self.logger.info(
|
|
234
|
-
"index.delete.executed",
|
|
235
|
-
extra={
|
|
236
|
-
"index": index,
|
|
237
|
-
"index_ts": index_ts.timestamp(),
|
|
238
|
-
"cutoff_ts": cutoff.timestamp(),
|
|
239
|
-
"status": "deleted"
|
|
240
|
-
}
|
|
241
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|