PyPI - pybgpkitstream - Versions diffs - 0.1.6__tar.gz → 0.2.0__tar.gz - Mend

pybgpkitstream 0.1.6tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{pybgpkitstream-0.1.6 → pybgpkitstream-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: pybgpkitstream
-Version: 0.1.6
+Version: 0.2.0
 Summary: Drop-in replacement for PyBGPStream using BGPKIT
 Author: JustinLoye
 Author-email: JustinLoye <jloye@iij.ad.jp>

{pybgpkitstream-0.1.6 → pybgpkitstream-0.2.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "pybgpkitstream"
-version = "0.1.6"
+version = "0.2.0"
 description = "Drop-in replacement for PyBGPStream using BGPKIT"
 readme = "README.md"
 authors = [

pybgpkitstream-0.2.0/src/pybgpkitstream/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .bgpstreamconfig import BGPStreamConfig, FilterOptions, PyBGPKITStreamConfig
+from .bgpkitstream import BGPKITStream
+__all__ = ["BGPStreamConfig", "FilterOptions", "BGPKITStream", "PyBGPKITStreamConfig"]

{pybgpkitstream-0.1.6 → pybgpkitstream-0.2.0}/src/pybgpkitstream/bgpkitstream.py RENAMED Viewed

@@ -6,7 +6,7 @@ from typing import Iterator, Literal
 from collections import defaultdict
 from itertools import chain
 from heapq import merge
-from operator import itemgetter
+from operator import attrgetter, itemgetter
 import binascii
 import logging
 from tempfile import TemporaryDirectory
@@ -15,8 +15,27 @@ import aiohttp
 import bgpkit
 from bgpkit.bgpkit_broker import BrokerItem
-from pybgpkitstream.bgpstreamconfig import BGPStreamConfig
+from pybgpkitstream.bgpstreamconfig import (
+    BGPStreamConfig,
+    FilterOptions,
+    PyBGPKITStreamConfig,
+)
 from pybgpkitstream.bgpelement import BGPElement
+from pybgpkitstream.bgpparser import (
+    BGPParser,
+    PyBGPKITParser,
+    BGPKITParser,
+    PyBGPStreamParser,
+    BGPdumpParser,
+)
+from pybgpkitstream.utils import dt_from_filepath
+name2parser = {
+    "pybgpkit": PyBGPKITParser,
+    "bgpkit": BGPKITParser,
+    "pybgpstream": PyBGPStreamParser,
+    "bgpdump": BGPdumpParser,
+}
 logger = logging.getLogger(__name__)
@@ -72,25 +91,40 @@ class BGPKITStream:
         ts_end: float,
         collector_id: str,
         data_type: list[Literal["update", "rib"]],
-        cache_dir: str | None,
-        filters: dict = {},
-        max_concurrent_downloads: int = 10,
+        filters: FilterOptions | None,
+        cache_dir: str | None = None,
+        max_concurrent_downloads: int | None = 10,
         chunk_time: float | None = datetime.timedelta(hours=2).seconds,
+        ram_fetch: bool | None = True,
+        parser_name: str | None = "pybgpkit",
     ):
+        # Stream config
         self.ts_start = ts_start
         self.ts_end = ts_end
         self.collector_id = collector_id
         self.data_type = data_type
-        self.cache_dir: Directory | TemporaryDirectory = (
-            Directory(cache_dir)
-            if cache_dir
-            else TemporaryDirectory(dir=get_shared_memory())
-        )
+        if not filters:
+            filters = FilterOptions()
         self.filters = filters
+        # Implementation config
         self.max_concurrent_downloads = max_concurrent_downloads
         self.chunk_time = chunk_time
+        self.ram_fetch = ram_fetch
+        if cache_dir:
+            self.cache_dir = Directory(cache_dir)
+        else:
+            if ram_fetch:
+                self.cache_dir = TemporaryDirectory(dir=get_shared_memory())
+            else:
+                self.cache_dir = TemporaryDirectory()
+        if not parser_name:
+            self.parser_name = "pybgpkit"
+        else:
+            self.parser_name = parser_name
         self.broker = bgpkit.Broker()
+        self.parser_cls: BGPParser = name2parser[parser_name]
     @staticmethod
     def _generate_cache_filename(url):
@@ -191,11 +225,14 @@ class BGPKITStream:
                         self.paths[data_type][rc].append(filepath)
                 logging.info("All downloads finished.")
-    def _create_tagged_iterator(self, iterator, is_rib, collector):
-        """Creates a generator that tags elements with metadata missing in bgpkit."""
-        return ((elem.timestamp, elem, is_rib, collector) for elem in iterator)
+    def __iter__(self):
+        if "update" in self.data_type:
+            return self._iter_update()
+        else:
+            return self._iter_rib()
-    def __iter__(self) -> Iterator[BGPElement]:
+    def _iter_update(self) -> Iterator[BGPElement]:
+        # __iter__ for data types [ribs, updates] or [updates]
         # try/finally to cleanup the fetching cache
         try:
             # Manager mode: spawn smaller worker streams to balance fetch/parse
@@ -209,17 +246,20 @@ class BGPKITStream:
                         f"Processing chunk: {datetime.datetime.fromtimestamp(current)} "
                         f"to {datetime.datetime.fromtimestamp(chunk_end)}"
                     )
                     worker = type(self)(
                         ts_start=current,
                         ts_end=chunk_end
                         - 1,  # remove one second because BGPKIT include border
                         collector_id=self.collector_id,
                         data_type=self.data_type,
-                        cache_dir=None,
+                        cache_dir=self.cache_dir.name
+                        if isinstance(self.cache_dir, Directory)
+                        else None,
                         filters=self.filters,
                         max_concurrent_downloads=self.max_concurrent_downloads,
                         chunk_time=None,  # Worker doesn't chunk itself
+                        ram_fetch=self.ram_fetch,
+                        parser_name=self.parser_name,
                     )
                     yield from worker
@@ -228,7 +268,6 @@ class BGPKITStream:
                 return
             self._set_urls()
             asyncio.run(self._prefetch_data())
             # One iterator for each data_type * collector combinations
@@ -243,48 +282,115 @@ class BGPKITStream:
                 # Chain rib or update iterators to get one stream per collector / data_type
                 for rc, paths in rc_to_paths.items():
+                    # Don't use a generator here. parsers are lazy anyway
                     parsers = [
-                        bgpkit.Parser(url=path, filters=self.filters) for path in paths
+                        self.parser_cls(path, is_rib, rc, filters=self.filters)
+                        for path in paths
                     ]
                     chained_iterator = chain.from_iterable(parsers)
                     # Add metadata lost by bgpkit for compatibility with pubgpstream
-                    iterators_to_merge.append((chained_iterator, is_rib, rc))
-            # Make a generator to tag each bgpkit element with metadata
-            # Benefit 1: full compat with pybgpstream
-            # Benefit 2: we give a key easy to access for heapq to merge
-            tagged_iterators = [
-                self._create_tagged_iterator(it, is_rib, rc)
-                for it, is_rib, rc in iterators_to_merge
-            ]
-            # Merge and convert to pybgpstream format
-            for timestamp, bgpkit_elem, is_rib, rc in merge(
-                *tagged_iterators, key=itemgetter(0)
-            ):
-                if self.ts_start <= timestamp <= self.ts_end:
-                    yield convert_bgpkit_elem(bgpkit_elem, is_rib, rc)
+                    # iterators_to_merge.append((chained_iterator, is_rib, rc))
+                    iterators_to_merge.append(chained_iterator)
+            for bgpelem in merge(*iterators_to_merge, key=attrgetter("time")):
+                if self.ts_start <= bgpelem.time <= self.ts_end:
+                    yield bgpelem
+        finally:
+            self.cache_dir.cleanup()
+    def _iter_rib(self) -> Iterator[BGPElement]:
+        # __iter__ for data types [ribs]
+        # try/finally to cleanup the fetching cache
+        try:
+            # Manager mode: spawn smaller worker streams to balance fetch/parse
+            if self.chunk_time:
+                current = self.ts_start
+                while current < self.ts_end:
+                    chunk_end = min(current + self.chunk_time, self.ts_end)
+                    logging.info(
+                        f"Processing chunk: {datetime.datetime.fromtimestamp(current)} "
+                        f"to {datetime.datetime.fromtimestamp(chunk_end)}"
+                    )
+                    worker = type(self)(
+                        ts_start=current,
+                        ts_end=chunk_end
+                        - 1,  # remove one second because BGPKIT include border
+                        collector_id=self.collector_id,
+                        data_type=self.data_type,
+                        cache_dir=self.cache_dir.name
+                        if isinstance(self.cache_dir, Directory)
+                        else None,
+                        filters=self.filters,
+                        max_concurrent_downloads=self.max_concurrent_downloads,
+                        chunk_time=None,  # Worker doesn't chunk itself
+                        ram_fetch=self.ram_fetch,
+                        parser_name=self.parser_name,
+                    )
+                    yield from worker
+                    current = chunk_end + 1e-7
+                return
+            self._set_urls()
+            asyncio.run(self._prefetch_data())
+            rc_to_paths = self.paths["rib"]
+            # Agglomerate all RIBs parsers for ordering
+            iterators_to_order = []
+            for rc, paths in rc_to_paths.items():
+                # Don't use a generator here. parsers are lazy anyway
+                parsers = [
+                    (
+                        dt_from_filepath(path),
+                        rc,
+                        self.parser_cls(path, True, rc, filters=self.filters),
+                    )
+                    for path in paths
+                ]
+                iterators_to_order.extend(parsers)
+            iterators_to_order.sort(key=itemgetter(0, 1))
+            for bgpelem in chain.from_iterable(
+                (iterator[2] for iterator in iterators_to_order)
+            ):
+                if self.ts_start <= bgpelem.time <= self.ts_end:
+                    yield bgpelem
         finally:
             self.cache_dir.cleanup()
     @classmethod
-    def from_config(cls, config: BGPStreamConfig):
-        return cls(
-            ts_start=config.start_time.timestamp(),
-            ts_end=config.end_time.timestamp(),
-            collector_id=",".join(config.collectors),
-            data_type=[
-                dtype[:-1] for dtype in config.data_types
-            ],  # removes plural form
-            cache_dir=str(config.cache_dir) if config.cache_dir else None,
-            filters=config.filters.model_dump(exclude_unset=True)
-            if config.filters
-            else {},
-            max_concurrent_downloads=config.max_concurrent_downloads
-            if config.max_concurrent_downloads
-            else 10,
-            chunk_time=config.chunk_time.seconds if config.chunk_time else None,
-        )
+    def from_config(cls, config: PyBGPKITStreamConfig | BGPStreamConfig):
+        if isinstance(config, PyBGPKITStreamConfig):
+            stream_config = config.bgpstream_config
+            return cls(
+                ts_start=stream_config.start_time.timestamp(),
+                ts_end=stream_config.end_time.timestamp(),
+                collector_id=",".join(stream_config.collectors),
+                data_type=[dtype[:-1] for dtype in stream_config.data_types],
+                filters=stream_config.filters
+                if stream_config.filters
+                else FilterOptions(),
+                cache_dir=str(config.cache_dir) if config.cache_dir else None,
+                max_concurrent_downloads=config.max_concurrent_downloads
+                if config.max_concurrent_downloads
+                else 10,
+                chunk_time=config.chunk_time.seconds if config.chunk_time else None,
+                ram_fetch=config.ram_fetch if config.ram_fetch else None,
+                parser_name=config.parser if config.parser else "pybgpkit",
+            )
+        elif isinstance(config, BGPStreamConfig):
+            return cls(
+                ts_start=config.start_time.timestamp(),
+                ts_end=config.end_time.timestamp(),
+                collector_id=",".join(config.collectors),
+                data_type=[dtype[:-1] for dtype in config.data_types],
+                filters=config.filters if config.filters else FilterOptions(),
+            )

pybgpkitstream-0.2.0/src/pybgpkitstream/bgpparser.py ADDED Viewed

@@ -0,0 +1,430 @@
+import bgpkit
+from pybgpkitstream.bgpstreamconfig import FilterOptions
+from pybgpkitstream.bgpelement import BGPElement
+from typing import Iterator, Protocol
+import re
+import ipaddress
+import subprocess as sp
+from pybgpkitstream.utils import dt_from_filepath
+try:
+    import pybgpstream
+except ImportError:
+    pass
+class BGPParser(Protocol):
+    filepath: str
+    is_rib: bool
+    collector: str
+    filters: FilterOptions
+    def __iter__(self) -> Iterator[BGPElement]: ...
+class PyBGPKITParser(BGPParser):
+    """Use BGPKIT Python bindings (default parser). Slower than other alternatives but easier to ship (no system dependencies)."""
+    def __init__(
+        self,
+        filepath: str,
+        is_rib: bool,
+        collector: str,
+        filters: FilterOptions = FilterOptions(),
+    ):
+        self.filepath = filepath
+        self.parser = None  # placeholder for lazy instantiation
+        self.is_rib = is_rib
+        self.collector = collector
+        self.filters = filters.model_dump(exclude_unset=True)
+        # cast int ipv to pybgpkit ipv4 or ipv6 string
+        if "ip_version" in self.filters:
+            ipv_int = self.filters["ip_version"]
+            if ipv_int:
+                self.filters["ip_version"] = f"ipv{ipv_int}"
+    def _convert(self, element) -> BGPElement:
+        return BGPElement(
+            type="R" if self.is_rib else element.elem_type,
+            collector=self.collector,
+            time=element.timestamp,
+            peer_asn=element.peer_asn,
+            peer_address=element.peer_ip,
+            fields={
+                "next-hop": element.next_hop,
+                "as-path": element.as_path,
+                "communities": [] if not element.communities else element.communities,
+                "prefix": element.prefix,
+            },
+        )
+    def __iter__(self) -> Iterator[BGPElement]:
+        parser = bgpkit.Parser(self.filepath, filters=self.filters)
+        for elem in parser:
+            yield self._convert(elem)
+class BGPKITParser(BGPParser):
+    """Run BGPKIT's CLI `bgpkit-parser` as a subprocess."""
+    def __init__(
+        self,
+        filepath: str,
+        is_rib: bool,
+        collector: str,
+        filters: FilterOptions | str | None = None,
+    ):
+        self.filepath = filepath
+        self.parser = None  # placeholder for lazy instantiation
+        self.is_rib = is_rib
+        self.collector = collector
+        self.filters = filters
+        # Set timestamp for the same behavior as bgpdump default (timestamp match rib time, not last change)
+        self.time = int(dt_from_filepath(self.filepath).timestamp())
+    def __iter__(self):
+        cmd = build_bgpkit_cmd(self.filepath, self.filters)
+        self.parser = sp.Popen(cmd, stdout=sp.PIPE, text=True, bufsize=1)
+        stream = (self._convert(line) for line in self.parser.stdout)
+        try:
+            yield from stream
+        finally:
+            # Cleanup happens whether exhausted or abandoned
+            self.parser.stdout.close()
+            self.parser.terminate()
+            self.parser.wait()  # Reap the zombie process
+    def _convert(self, element: str):
+        element = element.rstrip().split("|")
+        rec_type = element[0]
+        # 1. Handle Withdrawals (W)
+        # Structure: Type|Time|PeerIP|PeerAS|Prefix
+        if rec_type == "W":
+            return BGPElement(
+                type="W",
+                collector=self.collector,
+                time=self.time,  # force RIB filename timestamp instead of last changed
+                peer_asn=int(element[3]),
+                peer_address=element[2],
+                fields={"prefix": element[4]},
+            )
+        # 2. Handle Announcements (A)
+        # Structure: Type|Time|PeerIP|PeerAS|Prefix|ASPath|Origin|NextHop|...|Communities|...
+        # bgpkit-parser index mapping:
+        # 0: Type, 1: Time, 2: PeerIP, 3: PeerAS, 4: Prefix,
+        # 5: ASPath, 7: NextHop, 10: Communities
+        rec_comm = element[10]
+        return BGPElement(
+            # bgpkit outputs 'A' for both Updates and RIB entries.
+            # Map to "A" (Announcement) or change to "R" if you strictly need RIB typing.
+            "R" if self.is_rib else rec_type,
+            self.collector,
+            # float(element[1]),
+            self.time,
+            int(element[3]),
+            element[2],
+            {
+                "prefix": element[4],
+                "as-path": element[5],
+                "next-hop": element[7],
+                # Fast check for empty communities
+                "communities": rec_comm.split() if rec_comm else [],
+            },
+        )
+class PyBGPStreamParser(BGPParser):
+    """Use pybgpstream as a MRT parser with the `singlefile` data interface"""
+    def __init__(
+        self,
+        filepath: str,
+        is_rib: bool,
+        collector: str,
+        filters: FilterOptions,
+        *args,
+        **kwargs,
+    ):
+        self.filepath = filepath
+        self.collector = collector
+        self.filters = generate_bgpstream_filters(filters) if filters else None
+    def __iter__(self):
+        stream = pybgpstream.BGPStream(data_interface="singlefile", filter=self.filters)
+        stream.set_data_interface_option("singlefile", "rib-file", self.filepath)
+        for elem in stream:
+            elem.collector = self.collector
+            yield elem
+class BGPdumpParser(BGPParser):
+    """Run bgpdump as a subprocess. I might have over-engineered the filtering."""
+    def __init__(self, filepath, is_rib, collector, filters):
+        self.filepath = filepath
+        self.collector = collector
+        self._init_filters(filters)
+    def __iter__(self):
+        self.parser = sp.Popen(
+            ["bgpdump", "-m", "-v", self.filepath], stdout=sp.PIPE, text=True, bufsize=1
+        )
+        try:
+            raw_stream = (self._convert(line) for line in self.parser.stdout)
+            # Filter STATE message
+            clean_stream = (e for e in raw_stream if e is not None)
+            if self._filter_func:
+                yield from filter(self._filter_func, clean_stream)
+            else:
+                yield from clean_stream
+        finally:
+            # Cleanup happens whether exhausted or abandoned
+            self.parser.stdout.close()
+            self.parser.terminate()
+            self.parser.wait()  # Reap the zombie process
+    def _convert(self, element: str):
+        # Extract type once to avoid repeated list lookups
+        element = element.rstrip().split("|")
+        elem_type = element[2]
+        if elem_type == "STATE":
+            return
+        # 1. Handle Withdrawals (Fastest path, fewer fields)
+        if elem_type == "W":
+            return BGPElement(
+                "W",
+                self.collector,
+                float(element[1]),
+                int(element[4]),
+                element[3],
+                {"prefix": element[5]},  # Dict literal is faster than assignment
+            )
+        # 2. Handle RIB (TABLE_DUMP2) and Announcements (A)
+        # Common vars
+        rec_comm = element[11]
+        # Logic: if TABLE_DUMP2, type is R, else A
+        # Construct fields dict in one shot (BUILD_MAP opcode)
+        return BGPElement(
+            "R" if elem_type == "B" else "A",
+            self.collector,
+            float(element[1]),
+            int(element[4]),
+            element[3],
+            {
+                "prefix": element[5],
+                "as-path": element[6],
+                "next-hop": element[8],
+                # Check for empty string before splitting (avoids creating [''])
+                "communities": rec_comm.split() if rec_comm else [],
+            },
+        )
+    def _init_filters(self, f: FilterOptions):
+        # 1. Pre-process sets for O(1) lookups and compile Regex
+        # self.peer_asns = set([f.peer_asn]) if f.peer_asn else (set(f.peer_ips) if f.peer_ips else None)
+        if not f.model_dump(exclude_unset=True):
+            self._filter_func = None
+        self.peer_asn = f.peer_asn
+        # Peer IPs (handles both single and list)
+        self.peer_ips = None
+        if f.peer_ip:
+            self.peer_ips = {str(f.peer_ip)}
+        elif f.peer_ips:
+            self.peer_ips = {str(ip) for ip in f.peer_ips}
+        self.origin_asn = str(f.origin_asn) if f.origin_asn else None
+        self.update_type = (
+            f.update_type[0].upper() if f.update_type else None
+        )  # 'A' or 'W'
+        self.ip_version = f.ip_version
+        # Regex and CIDR objects
+        self.as_path_re = re.compile(f.as_path) if f.as_path else None
+        self.exact_net = ipaddress.ip_network(f.prefix) if f.prefix else None
+        self.sub_net = ipaddress.ip_network(f.prefix_sub) if f.prefix_sub else None
+        self.super_net = (
+            ipaddress.ip_network(f.prefix_super) if f.prefix_super else None
+        )
+        self.ss_net = (
+            ipaddress.ip_network(f.prefix_super_sub) if f.prefix_super_sub else None
+        )
+        # 2. Build the optimized filter function
+        self._filter_func = self._compile_filter()
+    def _compile_filter(self):
+        # Localize variables to the closure to avoid 'self' lookups in the loop
+        p_asn = self.peer_asn
+        p_ips = self.peer_ips
+        o_asn = self.origin_asn
+        u_type = self.update_type
+        version = self.ip_version
+        path_re = self.as_path_re
+        e_net = self.exact_net
+        sub_n = self.sub_net
+        sup_n = self.super_net
+        ss_n = self.ss_net
+        def filter_logic(e: BGPElement) -> bool:
+            # 1. Cheap checks first (Integers and Strings)
+            if p_asn is not None and int(e.peer_asn) != p_asn:
+                return False
+            if p_ips is not None and e.peer_address not in p_ips:
+                return False
+            if u_type is not None and e.type != u_type:
+                return False
+            # 2. String processing (Origin ASN and AS Path)
+            # Use .get() or direct access depending on your confidence in 'fields' content
+            as_path = e.fields.get("as-path", "")
+            if o_asn is not None:
+                if not as_path or as_path.rsplit(" ", 1)[-1] != o_asn:
+                    return False
+            if path_re is not None and not path_re.search(as_path):
+                return False
+            # 3. CIDR / IP Logic (Expensive)
+            prefix_str = e.fields.get("prefix")
+            if version is not None:
+                # Fast check for IP version without parsing
+                is_v6 = ":" in prefix_str if prefix_str else False
+                if (version == 6 and not is_v6) or (version == 4 and is_v6):
+                    return False
+            if e_net or sub_n or sup_n or ss_n:
+                if not prefix_str:
+                    return False
+                net = ipaddress.ip_network(prefix_str)
+                if e_net and net != e_net:
+                    return False
+                if sub_n and not net.subnet_of(sub_n):
+                    return False
+                if sup_n and not net.supernet_of(sup_n):
+                    return False
+                if ss_n and not (net.subnet_of(ss_n) or net.supernet_of(ss_n)):
+                    return False
+            return True
+        return filter_logic
+def generate_bgpstream_filters(f: FilterOptions) -> str | None:
+    """Generates a filter string compatible with BGPStream's C parser from a BGPStreamConfig object."""
+    if not f:
+        return None
+    if not f.model_dump(exclude_unset=True):
+        return None
+    parts = []
+    if f.peer_asn:
+        parts.append(f"peer {f.peer_asn}")
+    if f.as_path:
+        # Quote the value to handle potential spaces in the regex
+        parts.append(f'aspath "{f.as_path}"')
+    if f.origin_asn:
+        # Filtering by origin ASN is typically done via an AS path regex
+        parts.append(f'aspath "_{f.origin_asn}$"')
+    if f.update_type:
+        # The parser expects 'announcements' or 'withdrawals'
+        value = "announcements" if f.update_type == "announce" else "withdrawals"
+        parts.append(f"elemtype {value}")
+    # Handle all prefix variations
+    if f.prefix:
+        parts.append(f"prefix exact {f.prefix}")
+    if f.prefix_super:
+        parts.append(f"prefix less {f.prefix_super}")
+    if f.prefix_sub:
+        parts.append(f"prefix more {f.prefix_sub}")
+    if f.prefix_super_sub:
+        parts.append(f"prefix any {f.prefix_super_sub}")
+    if f.ip_version:
+        parts.append(f"ipversion {f.ip_version[-1]}")
+    # Warn about unsupported fields
+    if f.peer_ip or f.peer_ips:
+        print(
+            "Warning: peer_ip and peer_ips are not supported by this BGPStream filter string parser and will be ignored."
+        )
+    # Join all parts with 'and' as required by the parser
+    return " and ".join(parts)
+def build_bgpkit_cmd(filepath: str, filters: FilterOptions) -> list[str]:
+    # Start with the base command and file path
+    cmd = ["bgpkit-parser", filepath]
+    # 1. Simple Integer/String Mappings
+    if filters.origin_asn:
+        cmd.extend(["--origin-asn", str(filters.origin_asn)])
+    if filters.peer_ip:
+        cmd.extend(["--peer-ip", str(filters.peer_ip)])
+    if filters.peer_asn:
+        cmd.extend(["--peer-asn", str(filters.peer_asn)])
+    if filters.as_path:
+        cmd.extend(["--as-path", filters.as_path])
+    # 2. Prefix Logic (Handling super/sub flags)
+    # We prioritize the most specific prefix field provided
+    prefix_val = None
+    if filters.prefix:
+        prefix_val = filters.prefix
+    elif filters.prefix_super:
+        prefix_val = filters.prefix_super
+        cmd.append("--include-super")
+    elif filters.prefix_sub:
+        prefix_val = filters.prefix_sub
+        cmd.append("--include-sub")
+    elif filters.prefix_super_sub:
+        prefix_val = filters.prefix_super_sub
+        cmd.extend(["--include-super", "--include-sub"])
+    if prefix_val:
+        cmd.extend(["--prefix", prefix_val])
+    # 3. List-based filters (using the --filter "key=value" format)
+    if filters.peer_ips:
+        # If it's a list, we add a generic filter for the comma-separated string
+        ips_str = ",".join(str(ip) for ip in filters.peer_ips)
+        cmd.extend(["--filter", f"peer_ips={ips_str}"])
+    # 4. Enums and Literals
+    if filters.update_type:
+        # CLI accepts 'a' for announce and 'w' for withdraw
+        val = "a" if filters.update_type == "announce" else "w"
+        cmd.extend(["--elem-type", val])
+    if filters.ip_version:
+        if filters.ip_version == 4:
+            cmd.append("--ipv4-only")
+        elif filters.ip_version == 6:
+            cmd.append("--ipv6-only")
+    return cmd

pybgpkitstream-0.2.0/src/pybgpkitstream/bgpstreamconfig.py ADDED Viewed

@@ -0,0 +1,160 @@
+import datetime
+import importlib
+import shutil
+from pydantic import BaseModel, Field, DirectoryPath, field_validator, model_validator
+from typing import Literal
+from ipaddress import IPv4Address, IPv6Address
+class FilterOptions(BaseModel):
+    """A unified model for the available filter options."""
+    origin_asn: int | None = Field(
+        default=None, description="Filter by the origin AS number."
+    )
+    prefix: str | None = Field(
+        default=None, description="Filter by an exact prefix match."
+    )
+    prefix_super: str | None = Field(
+        default=None,
+        description="Filter by the exact prefix and its more general super-prefixes.",
+    )
+    prefix_sub: str | None = Field(
+        default=None,
+        description="Filter by the exact prefix and its more specific sub-prefixes.",
+    )
+    prefix_super_sub: str | None = Field(
+        default=None,
+        description="Filter by the exact prefix and both its super- and sub-prefixes.",
+    )
+    peer_ip: str | IPv4Address | IPv6Address | None = Field(
+        default=None, description="Filter by the IP address of a single BGP peer."
+    )
+    peer_ips: list[str | IPv4Address | IPv6Address] | None = Field(
+        default=None, description="Filter by a list of BGP peer IP addresses."
+    )
+    peer_asn: int | None = Field(
+        default=None, description="Filter by the AS number of the BGP peer."
+    )
+    update_type: Literal["withdraw", "announce"] | None = Field(
+        default=None, description="Filter by the BGP update message type."
+    )
+    as_path: str | None = Field(
+        default=None, description="Filter by a regular expression matching the AS path."
+    )
+    ip_version: Literal[4, 6] | None = Field(
+        default=None, description="Filter by ip version."
+    )
+class BGPStreamConfig(BaseModel):
+    """Unified BGPStream config, compatible with BGPKIT and pybgpstream"""
+    start_time: datetime.datetime = Field(description="Start of the stream")
+    end_time: datetime.datetime = Field(description="End of the stream")
+    collectors: list[str] = Field(description="List of collectors to get data from")
+    data_types: list[Literal["ribs", "updates"]] = Field(
+        description="List of archives files to consider (`ribs` or `updates`)"
+    )
+    filters: FilterOptions | None = Field(default=None, description="Optional filters")
+    @field_validator("start_time", "end_time")
+    @classmethod
+    def normalize_to_utc(cls, dt: datetime.datetime) -> datetime.datetime:
+        # if naive datetime (not timezone-aware) assume it's UTC
+        if dt.tzinfo is None:
+            return dt.replace(tzinfo=datetime.timezone.utc)
+        # if timezone-aware, convert to utc
+        else:
+            return dt.astimezone(datetime.timezone.utc)
+class PyBGPKITStreamConfig(BaseModel):
+    """Unified BGPStream config and parameters related to PyBGPKIT implementation (all optional)"""
+    bgpstream_config: BGPStreamConfig
+    max_concurrent_downloads: int | None = Field(
+        default=10, description="Maximum concurrent downloads of archive files."
+    )
+    cache_dir: DirectoryPath | None = Field(
+        default=None,
+        description="Specifies the directory for caching downloaded files.",
+    )
+    ram_fetch: bool | None = Field(
+        default=True,
+        description=(
+            "If caching is disabled, fetch temp files in shared RAM memory (/dev/shml) or normal disc temp dir (/tmp)."
+            "Default (True) improve perfomance and reduce disk wear, at the expense of increased RAM usage."
+        ),
+    )
+    chunk_time: datetime.timedelta | None = Field(
+        default=datetime.timedelta(hours=2),
+        description=(
+            "Interval for the fetch/parse cycles (benefits: avoid long prefetch time + periodic temps cleanup when caching is disabled)."
+            "Slower value means less RAM/disk used at the cost of performance."
+        ),
+    )
+    parser: Literal["pybgpkit", "bgpkit", "pybgpstream", "bgpdump"] = Field(
+        default="pybgpkit",
+        description=(
+            "MRT files parser. Default `pybgpkit` is installed but slow, the others are system dependencies."
+        ),
+    )
+    @field_validator("parser")
+    @classmethod
+    def check_parser_available(cls, parser: str) -> str:
+        if parser == "pybgpkit":
+            if importlib.util.find_spec("bgpkit") is None:
+                raise ValueError(
+                    "pybgpkit is not installed. Install with: pip install pybgpkit"
+                )
+        elif parser == "pybgpstream":
+            if importlib.util.find_spec("pybgpstream") is None:
+                raise ValueError(
+                    "pybgpstream is not installed. "
+                    "Install with: pip install pybgpstream (ensure system dependencies are met)"
+                )
+        elif parser == "bgpdump":
+            if shutil.which("bgpdump") is None:
+                raise ValueError(
+                    "bgpdump binary not found in PATH. "
+                    "Install with: sudo apt-get install bgpdump"
+                )
+        elif parser == "bgpkit":
+            if shutil.which("bgpkit-parser") is None:
+                raise ValueError(
+                    "bgpkit binary not found in PATH. "
+                    "Install from: https://github.com/bgpkit/bgpkit-parser "
+                    "or use cargo: cargo install bgpkit-parser"
+                )
+        # Return the parser value if validation passes
+        return parser
+    @model_validator(mode='before')
+    @classmethod
+    def nest_bgpstream_params(cls, data: dict) -> dict:
+        """Allow to define a flat config"""
+        # If the user already provided 'bgpstream_config', do nothing
+        if "bgpstream_config" in data:
+            return data
+        # Define which fields belong to the inner BGPStreamConfig
+        stream_fields = {"start_time", "end_time", "collectors", "data_types", "filters"}
+        # Extract those fields from the flat input
+        inner_data = {k: data.pop(k) for k in stream_fields if k in data}
+        # Nest them back into the dictionary
+        data["bgpstream_config"] = inner_data
+        return data

pybgpkitstream-0.2.0/src/pybgpkitstream/utils.py ADDED Viewed

@@ -0,0 +1,11 @@
+import datetime
+import re
+def dt_from_filepath(filepath: str, pattern=r"(\d{8}\.\d{4})") -> datetime.datetime:
+    match = re.search(pattern, filepath)
+    if not match:
+        raise RuntimeError("Could not determine time from filepath")
+    timestamp_str = match.group(1)
+    dt = datetime.datetime.strptime(timestamp_str, "%Y%m%d.%H%M")
+    dt = dt.replace(tzinfo=datetime.timezone.utc)
+    return dt

pybgpkitstream-0.1.6/src/pybgpkitstream/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-from .bgpstreamconfig import BGPStreamConfig, FilterOptions
-from .bgpkitstream import BGPKITStream
-__all__ = ["BGPStreamConfig", "FilterOptions", "BGPKITStream"]

pybgpkitstream-0.1.6/src/pybgpkitstream/bgpstreamconfig.py DELETED Viewed

@@ -1,82 +0,0 @@
-import datetime
-from pydantic import BaseModel, Field, DirectoryPath, field_validator
-from typing import Literal
-from ipaddress import IPv4Address, IPv6Address
-class FilterOptions(BaseModel):
-    """A unified model for the available filter options."""
-    origin_asn: int | None = Field(
-        default=None, description="Filter by the origin AS number."
-    )
-    prefix: str | None = Field(
-        default=None, description="Filter by an exact prefix match."
-    )
-    prefix_super: str | None = Field(
-        default=None,
-        description="Filter by the exact prefix and its more general super-prefixes.",
-    )
-    prefix_sub: str | None = Field(
-        default=None,
-        description="Filter by the exact prefix and its more specific sub-prefixes.",
-    )
-    prefix_super_sub: str | None = Field(
-        default=None,
-        description="Filter by the exact prefix and both its super- and sub-prefixes.",
-    )
-    peer_ip: str | IPv4Address | IPv6Address | None = Field(
-        default=None, description="Filter by the IP address of a single BGP peer."
-    )
-    peer_ips: list[str | IPv4Address | IPv6Address] | None = Field(
-        default=None, description="Filter by a list of BGP peer IP addresses."
-    )
-    peer_asn: str | None = Field(
-        default=None, description="Filter by the AS number of the BGP peer."
-    )
-    update_type: Literal["withdraw", "announce"] | None = Field(
-        default=None, description="Filter by the BGP update message type."
-    )
-    as_path: str | None = Field(
-        default=None, description="Filter by a regular expression matching the AS path."
-    )
-    ip_version: Literal["ipv4", "ipv6"] | None = Field(
-        default=None, description="Filter by ip version."
-    )
-class BGPStreamConfig(BaseModel):
-    """
-    Unified BGPStream config.
-    Filters are primarily written for BGPKit but utils to convert to pybgpstream are provided in tests/pybgpstream_utils.
-    """
-    start_time: datetime.datetime = Field(description="Start of the stream")
-    end_time: datetime.datetime = Field(description="End of the stream")
-    collectors: list[str] = Field(description="List of collectors to get data from")
-    data_types: list[Literal["ribs", "updates"]] = Field(
-        description="List of archives files to consider (`ribs` or `updates`)"
-    )
-    cache_dir: DirectoryPath | None = Field(
-        default=None,
-        description="Specifies the directory for caching downloaded files.",
-    )
-    filters: FilterOptions | None = Field(default=None, description="Optional filters")
-    max_concurrent_downloads: int | None = Field(
-        default=None, description="Maximum concurrent downloads when caching"
-    )
-    chunk_time: datetime.timedelta | None = Field(
-        default=datetime.timedelta(hours=2),
-        description="Interval for the fetch/parse cycle (avoid long prefetch time)",
-    )
-    @field_validator("start_time", "end_time")
-    @classmethod
-    def normalize_to_utc(cls, dt: datetime.datetime) -> datetime.datetime:
-        # if naive datetime (not timezone-aware) assume it's UTC
-        if dt.tzinfo is None:
-            return dt.replace(tzinfo=datetime.timezone.utc)
-        # if timezone-aware, convert to utc
-        else:
-            return dt.astimezone(datetime.timezone.utc)

{pybgpkitstream-0.1.6 → pybgpkitstream-0.2.0}/README.md RENAMED Viewed

File without changes

{pybgpkitstream-0.1.6 → pybgpkitstream-0.2.0}/src/pybgpkitstream/bgpelement.py RENAMED Viewed

File without changes

{pybgpkitstream-0.1.6 → pybgpkitstream-0.2.0}/src/pybgpkitstream/cli.py RENAMED Viewed

File without changes

{pybgpkitstream-0.1.6 → pybgpkitstream-0.2.0}/src/pybgpkitstream/py.typed RENAMED Viewed

File without changes

pybgpkitstream 0.1.6__tar.gz → 0.2.0__tar.gz

pybgpkitstream 0.1.6tar.gz → 0.2.0tar.gz