pybgpkitstream 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pybgpkitstream/bgpparser.py +45 -7
- pybgpkitstream/cli.py +29 -11
- pybgpkitstream-0.3.0.dist-info/METADATA +71 -0
- {pybgpkitstream-0.2.0.dist-info → pybgpkitstream-0.3.0.dist-info}/RECORD +6 -6
- pybgpkitstream-0.2.0.dist-info/METADATA +0 -79
- {pybgpkitstream-0.2.0.dist-info → pybgpkitstream-0.3.0.dist-info}/WHEEL +0 -0
- {pybgpkitstream-0.2.0.dist-info → pybgpkitstream-0.3.0.dist-info}/entry_points.txt +0 -0
pybgpkitstream/bgpparser.py
CHANGED
|
@@ -6,6 +6,7 @@ import re
|
|
|
6
6
|
import ipaddress
|
|
7
7
|
import subprocess as sp
|
|
8
8
|
from pybgpkitstream.utils import dt_from_filepath
|
|
9
|
+
import logging
|
|
9
10
|
|
|
10
11
|
try:
|
|
11
12
|
import pybgpstream
|
|
@@ -36,12 +37,21 @@ class PyBGPKITParser(BGPParser):
|
|
|
36
37
|
self.parser = None # placeholder for lazy instantiation
|
|
37
38
|
self.is_rib = is_rib
|
|
38
39
|
self.collector = collector
|
|
39
|
-
self.filters = filters.model_dump(exclude_unset=True)
|
|
40
|
+
self.filters: dict = filters.model_dump(exclude_unset=True, exclude_none=True)
|
|
40
41
|
# cast int ipv to pybgpkit ipv4 or ipv6 string
|
|
41
42
|
if "ip_version" in self.filters:
|
|
42
43
|
ipv_int = self.filters["ip_version"]
|
|
43
44
|
if ipv_int:
|
|
44
45
|
self.filters["ip_version"] = f"ipv{ipv_int}"
|
|
46
|
+
if self.filters.get("peer_asn"):
|
|
47
|
+
self.filters["peer_asn"] = str(self.filters["peer_asn"])
|
|
48
|
+
if self.filters.get("origin_asn"):
|
|
49
|
+
self.filters["origin_asn"] = str(self.filters["origin_asn"])
|
|
50
|
+
if self.filters.get("update_type"):
|
|
51
|
+
val = self.filters.pop("update_type")
|
|
52
|
+
self.filters["type"] = val
|
|
53
|
+
if self.filters.get("peer_ips"):
|
|
54
|
+
self.filters["peer_ips"] = ", ".join(self.filters["peer_ips"])
|
|
45
55
|
|
|
46
56
|
def _convert(self, element) -> BGPElement:
|
|
47
57
|
return BGPElement(
|
|
@@ -154,16 +164,44 @@ class PyBGPStreamParser(BGPParser):
|
|
|
154
164
|
):
|
|
155
165
|
self.filepath = filepath
|
|
156
166
|
self.collector = collector
|
|
157
|
-
self.filters =
|
|
167
|
+
self.filters = filters
|
|
158
168
|
|
|
159
|
-
def
|
|
160
|
-
|
|
169
|
+
def _iter_normal(self):
|
|
170
|
+
"""when there is no filter or filters are supported by pybgpstream"""
|
|
171
|
+
stream = pybgpstream.BGPStream(
|
|
172
|
+
data_interface="singlefile",
|
|
173
|
+
filter=generate_bgpstream_filters(self.filters) if self.filters else None,
|
|
174
|
+
)
|
|
175
|
+
stream.set_data_interface_option("singlefile", "rib-file", self.filepath)
|
|
176
|
+
|
|
177
|
+
for elem in stream:
|
|
178
|
+
elem.collector = self.collector
|
|
179
|
+
yield elem
|
|
180
|
+
|
|
181
|
+
def _iter_python_filter(self):
|
|
182
|
+
"""when filters are not supported by pybgpstream, filter from the python side"""
|
|
183
|
+
bgpstream_filter = generate_bgpstream_filters(self.filters)
|
|
184
|
+
stream = pybgpstream.BGPStream(
|
|
185
|
+
data_interface="singlefile",
|
|
186
|
+
filter=bgpstream_filter if bgpstream_filter else None,
|
|
187
|
+
)
|
|
161
188
|
stream.set_data_interface_option("singlefile", "rib-file", self.filepath)
|
|
189
|
+
peer_ips = set(self.filters.peer_ips)
|
|
162
190
|
|
|
163
191
|
for elem in stream:
|
|
192
|
+
if elem.peer_address not in peer_ips:
|
|
193
|
+
continue
|
|
164
194
|
elem.collector = self.collector
|
|
165
195
|
yield elem
|
|
166
196
|
|
|
197
|
+
def __iter__(self):
|
|
198
|
+
if not self.filters.peer_ip and not self.filters.peer_ips:
|
|
199
|
+
return self._iter_normal()
|
|
200
|
+
else:
|
|
201
|
+
if self.filters.peer_ip:
|
|
202
|
+
self.filters.peer_ips = [self.filters.peer_ip]
|
|
203
|
+
return self._iter_python_filter()
|
|
204
|
+
|
|
167
205
|
|
|
168
206
|
class BGPdumpParser(BGPParser):
|
|
169
207
|
"""Run bgpdump as a subprocess. I might have over-engineered the filtering."""
|
|
@@ -362,12 +400,12 @@ def generate_bgpstream_filters(f: FilterOptions) -> str | None:
|
|
|
362
400
|
parts.append(f"prefix any {f.prefix_super_sub}")
|
|
363
401
|
|
|
364
402
|
if f.ip_version:
|
|
365
|
-
parts.append(f"ipversion {f.ip_version
|
|
403
|
+
parts.append(f"ipversion {f.ip_version}")
|
|
366
404
|
|
|
367
405
|
# Warn about unsupported fields
|
|
368
406
|
if f.peer_ip or f.peer_ips:
|
|
369
|
-
|
|
370
|
-
"
|
|
407
|
+
logging.info(
|
|
408
|
+
"Filtering by peer_ip is not supported natively by pybgpstream (falling back to python-side filtering)"
|
|
371
409
|
)
|
|
372
410
|
|
|
373
411
|
# Join all parts with 'and' as required by the parser
|
pybgpkitstream/cli.py
CHANGED
|
@@ -2,8 +2,12 @@ import argparse
|
|
|
2
2
|
import sys
|
|
3
3
|
import datetime
|
|
4
4
|
|
|
5
|
-
from pybgpkitstream import
|
|
6
|
-
|
|
5
|
+
from pybgpkitstream import (
|
|
6
|
+
BGPStreamConfig,
|
|
7
|
+
FilterOptions,
|
|
8
|
+
PyBGPKITStreamConfig,
|
|
9
|
+
BGPKITStream,
|
|
10
|
+
)
|
|
7
11
|
|
|
8
12
|
|
|
9
13
|
def main():
|
|
@@ -40,12 +44,6 @@ def main():
|
|
|
40
44
|
default=["updates"],
|
|
41
45
|
help="List of archives to consider ('ribs' or 'updates').",
|
|
42
46
|
)
|
|
43
|
-
parser.add_argument(
|
|
44
|
-
"--cache-dir",
|
|
45
|
-
type=str,
|
|
46
|
-
default=None,
|
|
47
|
-
help="Directory for caching downloaded files.",
|
|
48
|
-
)
|
|
49
47
|
|
|
50
48
|
# Arguments for FilterOptions
|
|
51
49
|
parser.add_argument(
|
|
@@ -93,7 +91,7 @@ def main():
|
|
|
93
91
|
)
|
|
94
92
|
parser.add_argument(
|
|
95
93
|
"--peer-asn",
|
|
96
|
-
type=
|
|
94
|
+
type=int,
|
|
97
95
|
default=None,
|
|
98
96
|
help="Filter by the AS number of the BGP peer.",
|
|
99
97
|
)
|
|
@@ -111,6 +109,20 @@ def main():
|
|
|
111
109
|
help="Filter by a regular expression matching the AS path.",
|
|
112
110
|
)
|
|
113
111
|
|
|
112
|
+
# PyBGPKITStream implementation parameters
|
|
113
|
+
parser.add_argument(
|
|
114
|
+
"--cache-dir",
|
|
115
|
+
type=str,
|
|
116
|
+
default=None,
|
|
117
|
+
help="Directory for caching downloaded files.",
|
|
118
|
+
)
|
|
119
|
+
parser.add_argument(
|
|
120
|
+
"--parser",
|
|
121
|
+
type=str,
|
|
122
|
+
choices=["pybgpkit", "bgpkit", "pybgpstream", "bgpdump"],
|
|
123
|
+
default="pybgpkit",
|
|
124
|
+
)
|
|
125
|
+
|
|
114
126
|
args = parser.parse_args()
|
|
115
127
|
|
|
116
128
|
filter_options = FilterOptions(
|
|
@@ -130,19 +142,25 @@ def main():
|
|
|
130
142
|
if all(value is None for value in filter_options.model_dump().values()):
|
|
131
143
|
filter_options = None
|
|
132
144
|
|
|
133
|
-
|
|
145
|
+
bgpstream_config = BGPStreamConfig(
|
|
134
146
|
start_time=args.start_time,
|
|
135
147
|
end_time=args.end_time,
|
|
136
148
|
collectors=args.collectors,
|
|
137
149
|
data_types=args.data_types,
|
|
138
|
-
cache_dir=args.cache_dir,
|
|
139
150
|
filters=filter_options,
|
|
140
151
|
)
|
|
141
152
|
|
|
153
|
+
config = PyBGPKITStreamConfig(
|
|
154
|
+
bgpstream_config=bgpstream_config, cache_dir=args.cache_dir, parser=args.parser
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
for element in BGPKITStream.from_config(config):
|
|
158
|
+
print(element)
|
|
142
159
|
try:
|
|
143
160
|
for element in BGPKITStream.from_config(config):
|
|
144
161
|
print(element)
|
|
145
162
|
except Exception as e:
|
|
163
|
+
print(e)
|
|
146
164
|
print(f"An error occurred during streaming: {e}", file=sys.stderr)
|
|
147
165
|
sys.exit(1)
|
|
148
166
|
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: pybgpkitstream
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Drop-in replacement for PyBGPStream using BGPKIT
|
|
5
|
+
Author: JustinLoye
|
|
6
|
+
Author-email: JustinLoye <jloye@iij.ad.jp>
|
|
7
|
+
Requires-Dist: aiohttp>=3.12.15
|
|
8
|
+
Requires-Dist: pybgpkit>=0.6.2
|
|
9
|
+
Requires-Dist: pydantic>=2.11.9
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# PyBGPKITStream
|
|
14
|
+
|
|
15
|
+
A drop-in replacement for PyBGPStream using BGPKIT
|
|
16
|
+
|
|
17
|
+
## Features
|
|
18
|
+
|
|
19
|
+
- Generates time-ordered BGP messages on the fly from RIBs and updates MRT files of multiple collectors
|
|
20
|
+
- Stream the same BGP messages as PyBGPStream, enabling seamless, drop-in replacement
|
|
21
|
+
- Lazy loading consumes minimal memory, making it suitable for large datasets
|
|
22
|
+
- Multiple BGP parsers supported: `pybgpkit` (default but slow), `bgpkit-parser`, `bgpdump` and `pybgpstream` single file backend (the latter three are system dependencies)
|
|
23
|
+
- Caching with concurrent downloading fully compatible with the BGPKIT parser's caching functionality.
|
|
24
|
+
- Performance: for updates, typically 3–10× faster than PyBGPStream; for RIB-only processing, currently about 3–4× slower (see [perf.md](perf.md) for test details).
|
|
25
|
+
- A CLI tool
|
|
26
|
+
|
|
27
|
+
## Quick start
|
|
28
|
+
|
|
29
|
+
Installation:
|
|
30
|
+
|
|
31
|
+
```sh
|
|
32
|
+
pip install pybgpkitstream
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Usage:
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import datetime
|
|
39
|
+
from pybgpkitstream import BGPStreamConfig, BGPKITStream
|
|
40
|
+
|
|
41
|
+
config = BGPStreamConfig(
|
|
42
|
+
start_time=datetime.datetime(2010, 9, 1, 0, 0),
|
|
43
|
+
end_time=datetime.datetime(2010, 9, 1, 1, 59),
|
|
44
|
+
collectors=["route-views.sydney", "route-views.wide"],
|
|
45
|
+
data_types=["ribs", "updates"],
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
stream = BGPKITStream.from_config(config)
|
|
49
|
+
|
|
50
|
+
n_elems = 0
|
|
51
|
+
for elem in stream:
|
|
52
|
+
n_elems += 1
|
|
53
|
+
|
|
54
|
+
print(f"Processed {n_elems} BGP elements")
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
or in the terminal:
|
|
58
|
+
|
|
59
|
+
```sh
|
|
60
|
+
pybgpkitstream --start-time 2010-09-01T00:00:00 --end-time 2010-09-01T01:59:00 --collectors route-views.sydney route-views.wide --data-types updates > updates.txt
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Motivation
|
|
64
|
+
|
|
65
|
+
PyBGPStream is great but the implementation is complex and stops working when UC San Diego experiences a power outage.
|
|
66
|
+
BGPKIT broker and parser are great, but cannot be used to create an ordered stream of BGP messages from multiple collectors and multiple data types.
|
|
67
|
+
|
|
68
|
+
## Missing features
|
|
69
|
+
|
|
70
|
+
- live mode (I plan to add semi-live soon.)
|
|
71
|
+
- `pybgpkitstream.BGPElement` is not fully compatible with `pybgpstream.BGPElem`: missing record_type, project, router, router_ip
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
pybgpkitstream/__init__.py,sha256=OGWVhZdSvialNkIkQ1VBrmiyOcwkCA1D5IaLo7WQnPI,209
|
|
2
2
|
pybgpkitstream/bgpelement.py,sha256=7mXSUmWThhIbKy2JVsLchoteve0BshT3uH8cdbAe0Go,1176
|
|
3
3
|
pybgpkitstream/bgpkitstream.py,sha256=CKQv7dU-ooznuD1AjHKnZ6qRdPH1ZiOIEGtVNtU8PCY,15062
|
|
4
|
-
pybgpkitstream/bgpparser.py,sha256=
|
|
4
|
+
pybgpkitstream/bgpparser.py,sha256=eDTWV6iGZTxgF7m78UmBGLDDY6lGOc9TWsfSdJLhiY8,16264
|
|
5
5
|
pybgpkitstream/bgpstreamconfig.py,sha256=vIfEN475WDIZ7kGmi3dnj_1GIQE_r6qkDFETZmMvH5E,6199
|
|
6
|
-
pybgpkitstream/cli.py,sha256=
|
|
6
|
+
pybgpkitstream/cli.py,sha256=4F5yCNW6OcQFJAj2Hp0rBrDmUDkR1O9x-_aKhVpXrL4,4641
|
|
7
7
|
pybgpkitstream/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
pybgpkitstream/utils.py,sha256=6FwEEpBtY_20BDlJPOPFmTYQGqw7fCBLjXmnd7gjBdQ,404
|
|
9
|
-
pybgpkitstream-0.
|
|
10
|
-
pybgpkitstream-0.
|
|
11
|
-
pybgpkitstream-0.
|
|
12
|
-
pybgpkitstream-0.
|
|
9
|
+
pybgpkitstream-0.3.0.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
|
|
10
|
+
pybgpkitstream-0.3.0.dist-info/entry_points.txt,sha256=aWhImGlXLtRKkfyJHudcbSp5K5As4ZGL8wXZC0y6q4o,60
|
|
11
|
+
pybgpkitstream-0.3.0.dist-info/METADATA,sha256=kzkTcOUY8tHeZxIqbCo4pRrfQ3z3ZIzWvAbNZ5ULlfM,2356
|
|
12
|
+
pybgpkitstream-0.3.0.dist-info/RECORD,,
|
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: pybgpkitstream
|
|
3
|
-
Version: 0.2.0
|
|
4
|
-
Summary: Drop-in replacement for PyBGPStream using BGPKIT
|
|
5
|
-
Author: JustinLoye
|
|
6
|
-
Author-email: JustinLoye <jloye@iij.ad.jp>
|
|
7
|
-
Requires-Dist: aiohttp>=3.12.15
|
|
8
|
-
Requires-Dist: pybgpkit>=0.6.2
|
|
9
|
-
Requires-Dist: pydantic>=2.11.9
|
|
10
|
-
Requires-Python: >=3.10
|
|
11
|
-
Description-Content-Type: text/markdown
|
|
12
|
-
|
|
13
|
-
# PyBGPKITStream
|
|
14
|
-
|
|
15
|
-
A drop-in replacement for PyBGPStream using BGPKIT
|
|
16
|
-
|
|
17
|
-
## Features
|
|
18
|
-
|
|
19
|
-
- Effortless BGP Stream Switching:
|
|
20
|
-
- Seamless, drop-in replacement ([example](tests/test_stream.py#L38))
|
|
21
|
-
- Lazy message generation: generates time-ordered BGP messages on the fly, consuming minimal memory and making it suitable for large datasets
|
|
22
|
-
- Supports multiple route collectors
|
|
23
|
-
- Supports both ribs and updates
|
|
24
|
-
- Caching with concurrent downloading is enabled and is fully compatible with the BGPKIT parser's caching functionality.
|
|
25
|
-
- [Similar performance to PyBGPStream](examples/perf.ipynb)
|
|
26
|
-
- A CLI tool
|
|
27
|
-
|
|
28
|
-
## Quick start
|
|
29
|
-
|
|
30
|
-
Installation:
|
|
31
|
-
|
|
32
|
-
```sh
|
|
33
|
-
pip install pybgpkitstream
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
Usage:
|
|
37
|
-
|
|
38
|
-
```python
|
|
39
|
-
import datetime
|
|
40
|
-
from pybgpkitstream import BGPStreamConfig, BGPKITStream
|
|
41
|
-
|
|
42
|
-
config = BGPStreamConfig(
|
|
43
|
-
start_time=datetime.datetime(2010, 9, 1, 0, 0),
|
|
44
|
-
end_time=datetime.datetime(2010, 9, 1, 1, 59),
|
|
45
|
-
collectors=["route-views.sydney", "route-views.wide"],
|
|
46
|
-
data_types=["ribs", "updates"],
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
stream = BGPKITStream.from_config(config)
|
|
50
|
-
|
|
51
|
-
n_elems = 0
|
|
52
|
-
for _ in stream:
|
|
53
|
-
n_elems += 1
|
|
54
|
-
|
|
55
|
-
print(f"Processed {n_elems} BGP elements")
|
|
56
|
-
```
|
|
57
|
-
|
|
58
|
-
or in the terminal:
|
|
59
|
-
|
|
60
|
-
```sh
|
|
61
|
-
pybgpkitstream --start-time 2010-09-01T00:00:00 --end-time 2010-09-01T01:59:00 --collectors route-views.sydney route-views.wide --data-types updates > updates.txt
|
|
62
|
-
```
|
|
63
|
-
|
|
64
|
-
## Motivation
|
|
65
|
-
|
|
66
|
-
PyBGPStream is great but the implementation is complex and stops working when UC San Diego experiences a power outage.
|
|
67
|
-
BGPKIT broker and parser are great, but cannot be used to create an ordered stream of BGP messages from multiple collectors and multiple data types.
|
|
68
|
-
|
|
69
|
-
## Missing features
|
|
70
|
-
|
|
71
|
-
- live mode
|
|
72
|
-
- `pybgpkitstream.BGPElement` is not fully compatible with `pybgpstream.BGPElem`: missing record_type (BGPKIT limitation), project (BGPKIT limitation), router (could be improved), router_ip (could be improved)
|
|
73
|
-
- CLI output is not yet compatible with `bgpdump -m` or `bgpreader` (right now a similar-looking output is produced)
|
|
74
|
-
|
|
75
|
-
## Issues
|
|
76
|
-
|
|
77
|
-
- Program will crash when working with many update files per collector (~ more than few hours of data), only when caching is disabled. This might be caused by [BGPKIT parser not being lazy](https://github.com/bgpkit/bgpkit-parser/pull/239). See [details and workaround fix](examples/many_updates.ipynb)
|
|
78
|
-
- Filters are designed with BGPKIT in mind, and can slightly differ to pybgpstream. See [this file](tests/pybgpstream_utils.py) for a conversion to PyBGPStream filter. Note that for now the filters have not been heavily tested...
|
|
79
|
-
- ... just like the rest of the project. Use at your own risk. The only tests I did are in /tests
|
|
File without changes
|
|
File without changes
|