PyPI - pybgpkitstream - Versions diffs - 0.1.3__tar.gz → 0.1.5__tar.gz - Mend

pybgpkitstream 0.1.3tar.gz → 0.1.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{pybgpkitstream-0.1.3 → pybgpkitstream-0.1.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: pybgpkitstream
-Version: 0.1.3
+Version: 0.1.5
 Summary: Drop-in replacement for PyBGPStream using BGPKIT
 Author: JustinLoye
 Author-email: JustinLoye <jloye@iij.ad.jp>

{pybgpkitstream-0.1.3 → pybgpkitstream-0.1.5}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "pybgpkitstream"
-version = "0.1.3"
+version = "0.1.5"
 description = "Drop-in replacement for PyBGPStream using BGPKIT"
 readme = "README.md"
 authors = [

{pybgpkitstream-0.1.3 → pybgpkitstream-0.1.5}/src/pybgpkitstream/bgpkitstream.py RENAMED Viewed

@@ -9,6 +9,7 @@ from heapq import merge
 from operator import itemgetter
 import binascii
 import logging
+from tempfile import TemporaryDirectory
 import aiohttp
 import bgpkit
@@ -44,6 +45,17 @@ def crc32(input_str: str):
     return f"{crc:08x}"
+class Directory:
+    """Permanent directory that mimics TemporaryDirectory interface."""
+    def __init__(self, path):
+        self.name = str(path)
+    def cleanup(self):
+        """No-op cleanup for permanent directories."""
+        pass
 class BGPKITStream:
     def __init__(
         self,
@@ -60,7 +72,9 @@ class BGPKITStream:
         self.ts_end = ts_end
         self.collector_id = collector_id
         self.data_type = data_type
-        self.cache_dir = cache_dir
+        self.cache_dir: Directory | TemporaryDirectory = (
+            Directory(cache_dir) if cache_dir else TemporaryDirectory()
+        )
         self.filters = filters
         self.max_concurrent_downloads = max_concurrent_downloads
         self.chunk_time = chunk_time
@@ -72,7 +86,6 @@ class BGPKITStream:
         """Generate a cache filename compatible with BGPKIT parser."""
         hash_suffix = crc32(url)
-        print(url)
         if "updates." in url:
             data_type = "updates"
@@ -103,9 +116,8 @@ class BGPKITStream:
         self.urls = {"rib": defaultdict(list), "update": defaultdict(list)}
         for data_type in self.data_type:
             items: list[BrokerItem] = self.broker.query(
-                ts_start=datetime.datetime.fromtimestamp(self.ts_start)
-                - datetime.timedelta(minutes=1),
-                ts_end=datetime.datetime.fromtimestamp(self.ts_end),
+                ts_start=int(self.ts_start - 60),
+                ts_end=int(self.ts_end),
                 collector_id=self.collector_id,
                 data_type=data_type,
             )
@@ -142,7 +154,7 @@ class BGPKITStream:
                 for rc, rc_urls in self.urls[data_type].items():
                     for url in rc_urls:
                         filename = self._generate_cache_filename(url)
-                        filepath = os.path.join(self.cache_dir, filename)
+                        filepath = os.path.join(self.cache_dir.name, filename)
                         if os.path.exists(filepath):
                             logging.debug(f"{filepath} is a cache hit")
@@ -173,76 +185,79 @@ class BGPKITStream:
         return ((elem.timestamp, elem, is_rib, collector) for elem in iterator)
     def __iter__(self) -> Iterator[BGPElement]:
-        # Manager mode: spawn smaller worker streams to balance fetch/parse
-        if self.chunk_time:
-            current = self.ts_start
-            while current < self.ts_end:
-                chunk_end = min(current + self.chunk_time, self.ts_end)
-                logging.info(
-                    f"Processing chunk: {datetime.datetime.fromtimestamp(current)} "
-                    f"to {datetime.datetime.fromtimestamp(chunk_end)}"
-                )
-                worker = type(self)(
-                    ts_start=current,
-                    ts_end=chunk_end
-                    - 1,  # remove one second because BGPKIT include border
-                    collector_id=self.collector_id,
-                    data_type=self.data_type,
-                    cache_dir=self.cache_dir,
-                    filters=self.filters,
-                    max_concurrent_downloads=self.max_concurrent_downloads,
-                    chunk_time=None,  # Worker doesn't chunk itself
-                )
-                yield from worker
-                current = chunk_end + 1e-7
+        # try/finally to cleanup the fetching cache
+        try:
+            # Manager mode: spawn smaller worker streams to balance fetch/parse
+            if self.chunk_time:
+                current = self.ts_start
+                while current < self.ts_end:
+                    chunk_end = min(current + self.chunk_time, self.ts_end)
+                    logging.info(
+                        f"Processing chunk: {datetime.datetime.fromtimestamp(current)} "
+                        f"to {datetime.datetime.fromtimestamp(chunk_end)}"
+                    )
+                    worker = type(self)(
+                        ts_start=current,
+                        ts_end=chunk_end
+                        - 1,  # remove one second because BGPKIT include border
+                        collector_id=self.collector_id,
+                        data_type=self.data_type,
+                        cache_dir=None,
+                        filters=self.filters,
+                        max_concurrent_downloads=self.max_concurrent_downloads,
+                        chunk_time=None,  # Worker doesn't chunk itself
+                    )
+                    yield from worker
+                    current = chunk_end + 1e-7
+                return
+            self._set_urls()
-            return
-        self._set_urls()
-        if self.cache_dir:
             asyncio.run(self._prefetch_data())
-        # One iterator for each data_type * collector combinations
-        # To be merged according to the elements timestamp
-        iterators_to_merge = []
+            # One iterator for each data_type * collector combinations
+            # To be merged according to the elements timestamp
+            iterators_to_merge = []
-        for data_type in self.data_type:
-            is_rib = data_type == "rib"
-            # Get rib or update files per collector
-            if self.cache_dir:
-                rc_to_urls = self.paths[data_type]
-            else:
-                rc_to_urls = self.urls[data_type]
-            # Chain rib or update iterators to get one stream per collector / data_type
-            for rc, urls in rc_to_urls.items():
-                parsers = [bgpkit.Parser(url=url, filters=self.filters) for url in urls]
-                chained_iterator = chain.from_iterable(parsers)
-                # Add metadata lost by bgpkit for compatibility with pubgpstream
-                iterators_to_merge.append((chained_iterator, is_rib, rc))
-        # Make a generator to tag each bgpkit element with metadata
-        # Benefit 1: full compat with pybgpstream
-        # Benefit 2: we give a key easy to access for heapq to merge
-        tagged_iterators = [
-            self._create_tagged_iterator(it, is_rib, rc)
-            for it, is_rib, rc in iterators_to_merge
-        ]
-        # Merge and convert to pybgpstream format
-        for timestamp, bgpkit_elem, is_rib, rc in merge(
-            *tagged_iterators, key=itemgetter(0)
-        ):
-            if self.ts_start <= timestamp <= self.ts_end:
-                yield convert_bgpkit_elem(bgpkit_elem, is_rib, rc)
+            for data_type in self.data_type:
+                is_rib = data_type == "rib"
+                # Get rib or update files per collector
+                rc_to_paths = self.paths[data_type]
+                # Chain rib or update iterators to get one stream per collector / data_type
+                for rc, paths in rc_to_paths.items():
+                    parsers = [
+                        bgpkit.Parser(url=path, filters=self.filters) for path in paths
+                    ]
+                    chained_iterator = chain.from_iterable(parsers)
+                    # Add metadata lost by bgpkit for compatibility with pubgpstream
+                    iterators_to_merge.append((chained_iterator, is_rib, rc))
+            # Make a generator to tag each bgpkit element with metadata
+            # Benefit 1: full compat with pybgpstream
+            # Benefit 2: we give a key easy to access for heapq to merge
+            tagged_iterators = [
+                self._create_tagged_iterator(it, is_rib, rc)
+                for it, is_rib, rc in iterators_to_merge
+            ]
+            # Merge and convert to pybgpstream format
+            for timestamp, bgpkit_elem, is_rib, rc in merge(
+                *tagged_iterators, key=itemgetter(0)
+            ):
+                if self.ts_start <= timestamp <= self.ts_end:
+                    yield convert_bgpkit_elem(bgpkit_elem, is_rib, rc)
+        finally:
+            self.cache_dir.cleanup()
     @classmethod
     def from_config(cls, config: BGPStreamConfig):

{pybgpkitstream-0.1.3 → pybgpkitstream-0.1.5}/src/pybgpkitstream/bgpstreamconfig.py RENAMED Viewed

@@ -1,5 +1,5 @@
 import datetime
-from pydantic import BaseModel, Field, DirectoryPath
+from pydantic import BaseModel, Field, DirectoryPath, field_validator
 from typing import Literal
 from ipaddress import IPv4Address, IPv6Address
@@ -70,3 +70,13 @@ class BGPStreamConfig(BaseModel):
         default=datetime.timedelta(hours=2),
         description="Interval for the fetch/parse cycle (avoid long prefetch time)",
     )
+    @field_validator("start_time", "end_time")
+    @classmethod
+    def normalize_to_utc(cls, dt: datetime.datetime) -> datetime.datetime:
+        # if naive datetime (not timezone-aware) assume it's UTC
+        if dt.tzinfo is None:
+            return dt.replace(tzinfo=datetime.timezone.utc)
+        # if timezone-aware, convert to utc
+        else:
+            return dt.astimezone(datetime.timezone.utc)

{pybgpkitstream-0.1.3 → pybgpkitstream-0.1.5}/README.md RENAMED Viewed

File without changes

{pybgpkitstream-0.1.3 → pybgpkitstream-0.1.5}/src/pybgpkitstream/__init__.py RENAMED Viewed

File without changes

{pybgpkitstream-0.1.3 → pybgpkitstream-0.1.5}/src/pybgpkitstream/bgpelement.py RENAMED Viewed

File without changes

{pybgpkitstream-0.1.3 → pybgpkitstream-0.1.5}/src/pybgpkitstream/cli.py RENAMED Viewed

File without changes

{pybgpkitstream-0.1.3 → pybgpkitstream-0.1.5}/src/pybgpkitstream/py.typed RENAMED Viewed

File without changes

pybgpkitstream 0.1.3__tar.gz → 0.1.5__tar.gz

pybgpkitstream 0.1.3tar.gz → 0.1.5tar.gz