PyPI - sentry-nodestore-elastic - Versions diffs - 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

sentry-nodestore-elastic 1.0.1py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

sentry_nodestore_elastic/backend.py CHANGED Viewed

@@ -1,37 +1,78 @@
 import base64
 from datetime import datetime, timezone
 import logging
+import re
 import zlib
+from typing import Optional, List, Any
 import elasticsearch
+from elasticsearch import Elasticsearch
 from sentry.nodestore.base import NodeStorage
-class ElasticNodeStorage(NodeStorage):
-    logger = logging.getLogger("sentry.nodestore.elastic")
+logger = logging.getLogger("sentry.nodestore.elastic")
+class ElasticNodeStorage(NodeStorage):
+    """
+    Elasticsearch backend for Sentry nodestore.
+    This backend stores Sentry node objects in Elasticsearch instead of PostgreSQL,
+    providing better scalability and performance for high-load environments.
+    """
+    logger = logger
     encoding = 'utf-8'
+    # Index name pattern for date-based indices
+    INDEX_DATE_PATTERN = re.compile(r'^sentry-(\d{4}-\d{2}-\d{2})')
     def __init__(
         self,
-        es,
-        index='sentry-{date}',
-        refresh=False,
-        template_name='sentry',
-        alias_name='sentry',
-        validate_es=False,
-    ):
+        es: Elasticsearch,
+        index: str = 'sentry-{date}',
+        refresh: bool = False,
+        template_name: str = 'sentry',
+        alias_name: str = 'sentry',
+        validate_es: bool = False,
+    ) -> None:
+        """
+        Initialize Elasticsearch nodestore backend.
+        Args:
+            es: Elasticsearch client instance
+            index: Index name pattern with {date} placeholder (default: 'sentry-{date}')
+            refresh: Whether to refresh index after writes (default: False for better performance)
+            template_name: Name of the index template (default: 'sentry')
+            alias_name: Name of the index alias (default: 'sentry')
+            validate_es: Whether to validate Elasticsearch connection on init (default: False)
+        """
+        if not isinstance(es, Elasticsearch):
+            raise TypeError("es parameter must be an Elasticsearch client instance")
         self.es = es
         self.index = index
         self.refresh = refresh
         self.template_name = template_name
         self.alias_name = alias_name
         self.validate_es = validate_es
+        if self.validate_es:
+            try:
+                self.es.info()
+            except Exception as e:
+                raise ConnectionError(f"Failed to connect to Elasticsearch: {e}") from e
         super(ElasticNodeStorage, self).__init__()
-    def bootstrap(self):
+    def bootstrap(self) -> None:
+        """
+        Bootstrap Elasticsearch index template.
+        Creates an index template if it doesn't exist. Does not overwrite
+        existing templates to allow manual customization.
+        """
         try:
-            # do not owerwrite existing template with same name
-            # it may have been changed in elastic manually after creation
+            # Do not overwrite existing template with same name
+            # It may have been changed in elastic manually after creation
             # or created manually before sentry initialization
             self.es.indices.get_index_template(name=self.template_name)
             self.logger.info(
@@ -49,144 +90,298 @@ class ElasticNodeStorage(NodeStorage):
                     "status": "not found"
                 }
             )
-            self.es.indices.put_index_template(
-                create = True,
-                name = self.template_name,
-                index_patterns = [
-                    "sentry-*"
-                ],
-                template = {
-                    "settings": {
-                        "index": {
-                            "number_of_shards": 3,
-                            "number_of_replicas": 0
-                        }
-                    },
-                    "mappings": {
-                        "_source": {
-                            "enabled": False
+            try:
+                self.es.indices.put_index_template(
+                    create=True,
+                    name=self.template_name,
+                    index_patterns=["sentry-*"],
+                    template={
+                        "settings": {
+                            "index": {
+                                "number_of_shards": 3,
+                                "number_of_replicas": 0
+                            }
                         },
-                        "dynamic": "false",
-                        "dynamic_templates": [],
-                        "properties": {
-                            "data": {
-                                "type": "text",
-                                "index": False,
-                                "store": True
+                        "mappings": {
+                            "_source": {
+                                "enabled": False
                             },
-                            "timestamp": {
-                                "type": "date",
-                                "store": True
+                            "dynamic": "false",
+                            "dynamic_templates": [],
+                            "properties": {
+                                "data": {
+                                    "type": "text",
+                                    "index": False,
+                                    "store": True
+                                },
+                                "timestamp": {
+                                    "type": "date",
+                                    "store": True
+                                }
                             }
+                        },
+                        "aliases": {
+                            self.alias_name: {}
                         }
-                    },
-                    "aliases": {
-                        self.alias_name: {}
                     }
-                }
-            )
-            self.logger.info(
-                "bootstrap.template.create",
-                extra={
-                    "template": self.template_name,
-                    "alias": self.alias_name
-                }
-            )
+                )
+                self.logger.info(
+                    "bootstrap.template.create",
+                    extra={
+                        "template": self.template_name,
+                        "alias": self.alias_name
+                    }
+                )
+            except elasticsearch.exceptions.RequestError as e:
+                self.logger.error(
+                    "bootstrap.template.create.error",
+                    extra={
+                        "template": self.template_name,
+                        "error": str(e)
+                    },
+                    exc_info=True
+                )
+                raise
-    def _get_write_index(self):
-        return self.index.format(date=datetime.today().strftime('%Y-%m-%d'))
+    def _get_write_index(self) -> str:
+        """Get the index name for writing based on current date."""
+        return self.index.format(date=datetime.now(timezone.utc).strftime('%Y-%m-%d'))
-    def _get_read_index(self, id):
-        search = self.es.search(
-            index=self.alias_name,
-            body={
-                "query": {
-                    "term": {
-                        "_id": id
-                    },
-                },
-            }
-        )
-        if search["hits"]["total"]["value"] == 1:
-            return search["hits"]["hits"][0]["_index"]
-        else:
+    def _get_read_index(self, id: str) -> Optional[str]:
+        """
+        Get the index name containing the document with given ID.
+        Optimized to use direct get through alias instead of search query.
+        Falls back to search if direct get fails (for backward compatibility).
+        Args:
+            id: Document ID to find
+        Returns:
+            Index name containing the document, or None if not found
+        """
+        # Try direct get through alias first (more efficient)
+        try:
+            # Use _source: false and stored_fields to avoid loading document data
+            response = self.es.get(
+                id=id,
+                index=self.alias_name,
+                _source=False,
+                stored_fields="_none_"
+            )
+            return response.get('_index')
+        except elasticsearch.exceptions.NotFoundError:
+            return None
+        except elasticsearch.exceptions.RequestError:
+            # Fallback to search if direct get fails (e.g., alias routing issues)
+            try:
+                search = self.es.search(
+                    index=self.alias_name,
+                    body={
+                        "query": {
+                            "term": {
+                                "_id": id
+                            }
+                        },
+                        "size": 1,
+                        "_source": False
+                    }
+                )
+                if search["hits"]["total"]["value"] == 1:
+                    return search["hits"]["hits"][0]["_index"]
+            except Exception as e:
+                self.logger.warning(
+                    "document.get_index.error",
+                    extra={
+                        "doc_id": id,
+                        "error": str(e)
+                    }
+                )
             return None
-    def _compress(self, data):
+    def _compress(self, data: bytes) -> str:
+        """
+        Compress and encode data for storage.
+        Args:
+            data: Raw bytes to compress
+        Returns:
+            Base64-encoded compressed string
+        """
+        if not isinstance(data, bytes):
+            raise TypeError(f"data must be bytes, got {type(data)}")
         return base64.b64encode(zlib.compress(data)).decode(self.encoding)
-    def _decompress(self, data):
-        return zlib.decompress(base64.b64decode(data))
-    def delete(self, id):
+    def _decompress(self, data: str) -> bytes:
         """
-        >>> nodestore.delete('key1')
+        Decompress and decode data from storage.
+        Args:
+            data: Base64-encoded compressed string
+        Returns:
+            Decompressed bytes
         """
+        if not isinstance(data, str):
+            raise TypeError(f"data must be str, got {type(data)}")
+        try:
+            return zlib.decompress(base64.b64decode(data))
+        except (ValueError, zlib.error) as e:
+            raise ValueError(f"Failed to decompress data: {e}") from e
+    def delete(self, id: str) -> None:
+        """
+        Delete a node by ID.
+        Args:
+            id: Document ID to delete
+        Example:
+            >>> nodestore.delete('key1')
+        """
+        if not id:
+            raise ValueError("id cannot be empty")
         try:
+            # Use direct delete instead of delete_by_query for better performance
+            index = self._get_read_index(id)
+            if index:
+                self.es.delete(id=id, index=index, refresh=self.refresh)
+            else:
+                # Fallback to delete_by_query if index not found
+                self.es.delete_by_query(
+                    index=self.alias_name,
+                    query={
+                        "term": {
+                            "_id": id
+                        }
+                    }
+                )
             self.logger.info(
                 "document.delete.executed",
                 extra={
                     "doc_id": id
                 }
             )
-            self.es.delete_by_query(
-                index=self.alias_name,
-                query = {
-                    "term": {
-                      "_id": id
-                    }
-                }
-            )
         except elasticsearch.exceptions.NotFoundError:
+            # Document doesn't exist, which is fine
             pass
         except elasticsearch.exceptions.ConflictError:
+            # Concurrent deletion, which is fine
             pass
+        except Exception as e:
+            self.logger.error(
+                "document.delete.error",
+                extra={
+                    "doc_id": id,
+                    "error": str(e)
+                },
+                exc_info=True
+            )
+            raise
-    def delete_multi(self, id_list):
+    def delete_multi(self, id_list: List[str]) -> None:
         """
         Delete multiple nodes.
         Note: This is not guaranteed to be atomic and may result in a partial
         delete.
-        >>> delete_multi(['key1', 'key2'])
+        Args:
+            id_list: List of document IDs to delete
+        Example:
+            >>> delete_multi(['key1', 'key2'])
         """
+        if not id_list:
+            return
+        if not isinstance(id_list, list):
+            raise TypeError(f"id_list must be a list, got {type(id_list)}")
         try:
             response = self.es.delete_by_query(
                 index=self.alias_name,
-                query = {
+                query={
                     "ids": {
                         "values": id_list
                     }
-                }
+                },
+                refresh=self.refresh
             )
             self.logger.info(
                 "document.delete_multi.executed",
                 extra={
                     "docs_to_delete": len(id_list),
-                    "docs_deleted": response["deleted"]
+                    "docs_deleted": response.get("deleted", 0)
                 }
             )
         except elasticsearch.exceptions.NotFoundError:
+            # Indices don't exist, which is fine
             pass
         except elasticsearch.exceptions.ConflictError:
+            # Concurrent deletion, which is fine
             pass
+        except Exception as e:
+            self.logger.error(
+                "document.delete_multi.error",
+                extra={
+                    "docs_to_delete": len(id_list),
+                    "error": str(e)
+                },
+                exc_info=True
+            )
+            raise
-    def _get_bytes(self, id):
+    def _get_bytes(self, id: str) -> Optional[bytes]:
         """
-        >>> nodestore._get_bytes('key1')
-        b'{"message": "hello world"}'
+        Get raw bytes for a node by ID.
+        Args:
+            id: Document ID to retrieve
+        Returns:
+            Decompressed bytes, or None if not found
+        Example:
+            >>> nodestore._get_bytes('key1')
+            b'{"message": "hello world"}'
         """
+        if not id:
+            return None
         index = self._get_read_index(id)
         if index is not None:
             try:
                 response = self.es.get(id=id, index=index, stored_fields=["data"])
+                if 'fields' in response and 'data' in response['fields']:
+                    return self._decompress(response['fields']['data'][0])
+                else:
+                    self.logger.warning(
+                        "document.get.warning",
+                        extra={
+                            "doc_id": id,
+                            "index": index,
+                            "error": "data field not found in response"
+                        }
+                    )
+                    return None
             except elasticsearch.exceptions.NotFoundError:
                 return None
-            else:
-                return self._decompress(response['fields']['data'][0])
+            except Exception as e:
+                self.logger.error(
+                    "document.get.error",
+                    extra={
+                        "doc_id": id,
+                        "index": index,
+                        "error": str(e)
+                    },
+                    exc_info=True
+                )
+                return None
         else:
             self.logger.warning(
                 "document.get.warning",
@@ -198,44 +393,142 @@ class ElasticNodeStorage(NodeStorage):
             return None
-    def _set_bytes(self, id, data, ttl=None):
+    def _set_bytes(self, id: str, data: bytes, ttl: Optional[int] = None) -> None:
         """
-        >>> nodestore.set('key1', b"{'foo': 'bar'}")
+        Set raw bytes for a node by ID.
+        Args:
+            id: Document ID
+            data: Raw bytes to store
+            ttl: Time to live in seconds (not currently used, reserved for future use)
+        Example:
+            >>> nodestore._set_bytes('key1', b"{'foo': 'bar'}")
         """
+        if not id:
+            raise ValueError("id cannot be empty")
+        if not isinstance(data, bytes):
+            raise TypeError(f"data must be bytes, got {type(data)}")
         index = self._get_write_index()
-        self.es.index(
-            id=id,
-            index=index,
-            document={'data': self._compress(data), 'timestamp': datetime.utcnow().isoformat()},
-            refresh=self.refresh,
-        )
+        try:
+            self.es.index(
+                id=id,
+                index=index,
+                document={
+                    'data': self._compress(data),
+                    'timestamp': datetime.now(timezone.utc).isoformat()
+                },
+                refresh=self.refresh,
+            )
+        except Exception as e:
+            self.logger.error(
+                "document.set.error",
+                extra={
+                    "doc_id": id,
+                    "index": index,
+                    "error": str(e)
+                },
+                exc_info=True
+            )
+            raise
-    def cleanup(self, cutoff: datetime):
-        for index in self.es.indices.get_alias(index=self.alias_name):
-            # parse date from manually changed indices after reindex
-            # (they may have postfixes like '-fixed' or '-reindex')
-            index_date = '-'.join(index.split('-')[1:4])
-            index_ts = datetime.strptime(index_date, "%Y-%m-%d").replace(
-                        tzinfo=timezone.utc
-                    )
-            if index_ts < cutoff:
-                try:
-                    self.es.indices.delete(index=index)
-                except elasticsearch.exceptions.NotFoundError:
-                    self.logger.info(
-                        "index.delete.error",
-                        extra={
-                            "index": index,
-                            "error": "not found"
-                        }
-                    )
-                else:
-                    self.logger.info(
-                        "index.delete.executed",
-                        extra={
-                            "index": index,
-                            "index_ts": index_ts.timestamp(),
-                            "cutoff_ts": cutoff.timestamp(),
-                            "status": "deleted"
-                        }
-                    )
+    def cleanup(self, cutoff: datetime) -> None:
+        """
+        Clean up indices older than the cutoff date.
+        Args:
+            cutoff: Datetime threshold - indices older than this will be deleted
+        """
+        if not isinstance(cutoff, datetime):
+            raise TypeError(f"cutoff must be a datetime object, got {type(cutoff)}")
+        # Ensure cutoff is timezone-aware
+        if cutoff.tzinfo is None:
+            cutoff = cutoff.replace(tzinfo=timezone.utc)
+        try:
+            alias_indices = self.es.indices.get_alias(index=self.alias_name)
+        except elasticsearch.exceptions.NotFoundError:
+            self.logger.warning(
+                "cleanup.alias.not_found",
+                extra={
+                    "alias": self.alias_name
+                }
+            )
+            return
+        deleted_count = 0
+        skipped_count = 0
+        for index in alias_indices:
+            # Parse date from index name using regex for more robust parsing
+            # Handles indices with postfixes like '-fixed' or '-reindex'
+            match = self.INDEX_DATE_PATTERN.match(index)
+            if not match:
+                self.logger.warning(
+                    "cleanup.index.skip",
+                    extra={
+                        "index": index,
+                        "reason": "index name does not match expected pattern"
+                    }
+                )
+                skipped_count += 1
+                continue
+            try:
+                index_date_str = match.group(1)
+                index_ts = datetime.strptime(index_date_str, "%Y-%m-%d").replace(
+                    tzinfo=timezone.utc
+                )
+                if index_ts < cutoff:
+                    try:
+                        self.es.indices.delete(index=index)
+                        deleted_count += 1
+                        self.logger.info(
+                            "index.delete.executed",
+                            extra={
+                                "index": index,
+                                "index_ts": index_ts.timestamp(),
+                                "cutoff_ts": cutoff.timestamp(),
+                                "status": "deleted"
+                            }
+                        )
+                    except elasticsearch.exceptions.NotFoundError:
+                        self.logger.info(
+                            "index.delete.error",
+                            extra={
+                                "index": index,
+                                "error": "not found"
+                            }
+                        )
+                    except Exception as e:
+                        self.logger.error(
+                            "index.delete.error",
+                            extra={
+                                "index": index,
+                                "error": str(e)
+                            },
+                            exc_info=True
+                        )
+            except ValueError as e:
+                self.logger.warning(
+                    "cleanup.index.skip",
+                    extra={
+                        "index": index,
+                        "reason": f"failed to parse date: {e}"
+                    }
+                )
+                skipped_count += 1
+        self.logger.info(
+            "cleanup.completed",
+            extra={
+                "cutoff_ts": cutoff.timestamp(),
+                "deleted_count": deleted_count,
+                "skipped_count": skipped_count,
+                "total_checked": len(alias_indices)
+            }
+        )

{sentry_nodestore_elastic-1.0.1.dist-info → sentry_nodestore_elastic-1.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: sentry-nodestore-elastic
-Version: 1.0.1
+Version: 1.1.0
 Summary: Sentry nodestore Elasticsearch backend
 Home-page: https://github.com/andrsp/sentry-nodestore-elastic
 Author: andrsp@gmail.com
@@ -12,7 +12,6 @@ Project-URL: Source Code, https://github.com/andrsp/sentry-nodestore-elastic
 Keywords: sentry,elasticsearch,nodestore,backend
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Operating System :: POSIX
 Classifier: Operating System :: MacOS :: MacOS X
 Classifier: Operating System :: Microsoft :: Windows
@@ -20,8 +19,20 @@ Classifier: Programming Language :: Python
 Classifier: Operating System :: OS Independent
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: sentry ==24.*
-Requires-Dist: elasticsearch ==8.*
+Requires-Dist: sentry<27.0.0,>=26.1.0
+Requires-Dist: elasticsearch<9.0.0,>=8.0.0
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: keywords
+Dynamic: license
+Dynamic: license-file
+Dynamic: project-url
+Dynamic: requires-dist
+Dynamic: summary
 # sentry-nodestore-elastic
@@ -29,7 +40,7 @@ Sentry nodestore Elasticsearch backend
 [![image](https://img.shields.io/pypi/v/sentry-nodestore-elastic.svg)](https://pypi.python.org/pypi/sentry-nodestore-elastic)
-Supported Sentry 24.x & elasticsearch 8.x versions
+Supported Sentry 26.1.0+ & elasticsearch 8.x versions
 Use Elasticsearch cluster for store node objects from Sentry
@@ -46,7 +57,7 @@ Switching nodestore to dedicated Elasticsearch cluster provides more scalability
 Rebuild sentry docker image with nodestore package installation
 ``` shell
-FROM getsentry/sentry:24.4.1
+FROM getsentry/sentry:26.1.0
 RUN  pip install sentry-nodestore-elastic
 ```
@@ -195,7 +206,7 @@ while True:
     bulk(es, bulk_data)
     count = count - 2000
-    print(f"Remainig rows: {count}")
+    print(f"Remaining rows: {count}")
 cursor.close()
 conn.close()

sentry_nodestore_elastic-1.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+sentry_nodestore_elastic/__init__.py,sha256=vU-X62MDmPtTKab1xRiCrZl2MwOsbPX0kSXpGV7hAHk,64
+sentry_nodestore_elastic/backend.py,sha256=GJDrmf2wILJJGLXtU1UJpOcuiuStmchYYOCOPulRJIc,18294
+sentry_nodestore_elastic-1.1.0.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+sentry_nodestore_elastic-1.1.0.dist-info/METADATA,sha256=zoJd9BNYcraXaVfiO7PVm3kUe8Kg_feRxl96lccwLck,6230
+sentry_nodestore_elastic-1.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+sentry_nodestore_elastic-1.1.0.dist-info/top_level.txt,sha256=PFv5ZH9Um8naXLk3uknqoowcfN-K8jOpI98smdVpSWQ,25
+sentry_nodestore_elastic-1.1.0.dist-info/RECORD,,

{sentry_nodestore_elastic-1.0.1.dist-info → sentry_nodestore_elastic-1.1.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.43.0)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

sentry_nodestore_elastic-1.0.1.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-sentry_nodestore_elastic/__init__.py,sha256=vU-X62MDmPtTKab1xRiCrZl2MwOsbPX0kSXpGV7hAHk,64
-sentry_nodestore_elastic/backend.py,sha256=e48_3CQdBs46YqJ3oHBsRwRe3JBjLA5tiNY75XrweQs,7676
-sentry_nodestore_elastic-1.0.1.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-sentry_nodestore_elastic-1.0.1.dist-info/METADATA,sha256=5iB-UfnBHEmX3CFLntM6tGKiEejxrHTlLPQV64SgOi0,6022
-sentry_nodestore_elastic-1.0.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-sentry_nodestore_elastic-1.0.1.dist-info/top_level.txt,sha256=PFv5ZH9Um8naXLk3uknqoowcfN-K8jOpI98smdVpSWQ,25
-sentry_nodestore_elastic-1.0.1.dist-info/RECORD,,

{sentry_nodestore_elastic-1.0.1.dist-info → sentry_nodestore_elastic-1.1.0.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{sentry_nodestore_elastic-1.0.1.dist-info → sentry_nodestore_elastic-1.1.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

sentry-nodestore-elastic 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

sentry-nodestore-elastic 1.0.1py3-none-any.whl → 1.1.0py3-none-any.whl