pybgpkitstream 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,7 @@ import re
6
6
  import ipaddress
7
7
  import subprocess as sp
8
8
  from pybgpkitstream.utils import dt_from_filepath
9
+ import logging
9
10
 
10
11
  try:
11
12
  import pybgpstream
@@ -36,12 +37,21 @@ class PyBGPKITParser(BGPParser):
36
37
  self.parser = None # placeholder for lazy instantiation
37
38
  self.is_rib = is_rib
38
39
  self.collector = collector
39
- self.filters = filters.model_dump(exclude_unset=True)
40
+ self.filters: dict = filters.model_dump(exclude_unset=True, exclude_none=True)
40
41
  # cast int ipv to pybgpkit ipv4 or ipv6 string
41
42
  if "ip_version" in self.filters:
42
43
  ipv_int = self.filters["ip_version"]
43
44
  if ipv_int:
44
45
  self.filters["ip_version"] = f"ipv{ipv_int}"
46
+ if self.filters.get("peer_asn"):
47
+ self.filters["peer_asn"] = str(self.filters["peer_asn"])
48
+ if self.filters.get("origin_asn"):
49
+ self.filters["origin_asn"] = str(self.filters["origin_asn"])
50
+ if self.filters.get("update_type"):
51
+ val = self.filters.pop("update_type")
52
+ self.filters["type"] = val
53
+ if self.filters.get("peer_ips"):
54
+ self.filters["peer_ips"] = ", ".join(self.filters["peer_ips"])
45
55
 
46
56
  def _convert(self, element) -> BGPElement:
47
57
  return BGPElement(
@@ -154,16 +164,44 @@ class PyBGPStreamParser(BGPParser):
154
164
  ):
155
165
  self.filepath = filepath
156
166
  self.collector = collector
157
- self.filters = generate_bgpstream_filters(filters) if filters else None
167
+ self.filters = filters
158
168
 
159
- def __iter__(self):
160
- stream = pybgpstream.BGPStream(data_interface="singlefile", filter=self.filters)
169
+ def _iter_normal(self):
170
+ """when there is no filter or filters are supported by pybgpstream"""
171
+ stream = pybgpstream.BGPStream(
172
+ data_interface="singlefile",
173
+ filter=generate_bgpstream_filters(self.filters) if self.filters else None,
174
+ )
175
+ stream.set_data_interface_option("singlefile", "rib-file", self.filepath)
176
+
177
+ for elem in stream:
178
+ elem.collector = self.collector
179
+ yield elem
180
+
181
+ def _iter_python_filter(self):
182
+ """when filters are not supported by pybgpstream, filter from the python side"""
183
+ bgpstream_filter = generate_bgpstream_filters(self.filters)
184
+ stream = pybgpstream.BGPStream(
185
+ data_interface="singlefile",
186
+ filter=bgpstream_filter if bgpstream_filter else None,
187
+ )
161
188
  stream.set_data_interface_option("singlefile", "rib-file", self.filepath)
189
+ peer_ips = set(self.filters.peer_ips)
162
190
 
163
191
  for elem in stream:
192
+ if elem.peer_address not in peer_ips:
193
+ continue
164
194
  elem.collector = self.collector
165
195
  yield elem
166
196
 
197
+ def __iter__(self):
198
+ if not self.filters.peer_ip and not self.filters.peer_ips:
199
+ return self._iter_normal()
200
+ else:
201
+ if self.filters.peer_ip:
202
+ self.filters.peer_ips = [self.filters.peer_ip]
203
+ return self._iter_python_filter()
204
+
167
205
 
168
206
  class BGPdumpParser(BGPParser):
169
207
  """Run bgpdump as a subprocess. I might have over-engineered the filtering."""
@@ -362,12 +400,12 @@ def generate_bgpstream_filters(f: FilterOptions) -> str | None:
362
400
  parts.append(f"prefix any {f.prefix_super_sub}")
363
401
 
364
402
  if f.ip_version:
365
- parts.append(f"ipversion {f.ip_version[-1]}")
403
+ parts.append(f"ipversion {f.ip_version}")
366
404
 
367
405
  # Warn about unsupported fields
368
406
  if f.peer_ip or f.peer_ips:
369
- print(
370
- "Warning: peer_ip and peer_ips are not supported by this BGPStream filter string parser and will be ignored."
407
+ logging.info(
408
+ "Filtering by peer_ip is not supported natively by pybgpstream (falling back to python-side filtering)"
371
409
  )
372
410
 
373
411
  # Join all parts with 'and' as required by the parser
pybgpkitstream/cli.py CHANGED
@@ -2,8 +2,12 @@ import argparse
2
2
  import sys
3
3
  import datetime
4
4
 
5
- from pybgpkitstream import BGPStreamConfig, FilterOptions
6
- from pybgpkitstream import BGPKITStream
5
+ from pybgpkitstream import (
6
+ BGPStreamConfig,
7
+ FilterOptions,
8
+ PyBGPKITStreamConfig,
9
+ BGPKITStream,
10
+ )
7
11
 
8
12
 
9
13
  def main():
@@ -40,12 +44,6 @@ def main():
40
44
  default=["updates"],
41
45
  help="List of archives to consider ('ribs' or 'updates').",
42
46
  )
43
- parser.add_argument(
44
- "--cache-dir",
45
- type=str,
46
- default=None,
47
- help="Directory for caching downloaded files.",
48
- )
49
47
 
50
48
  # Arguments for FilterOptions
51
49
  parser.add_argument(
@@ -93,7 +91,7 @@ def main():
93
91
  )
94
92
  parser.add_argument(
95
93
  "--peer-asn",
96
- type=str,
94
+ type=int,
97
95
  default=None,
98
96
  help="Filter by the AS number of the BGP peer.",
99
97
  )
@@ -111,6 +109,20 @@ def main():
111
109
  help="Filter by a regular expression matching the AS path.",
112
110
  )
113
111
 
112
+ # PyBGPKITStream implementation parameters
113
+ parser.add_argument(
114
+ "--cache-dir",
115
+ type=str,
116
+ default=None,
117
+ help="Directory for caching downloaded files.",
118
+ )
119
+ parser.add_argument(
120
+ "--parser",
121
+ type=str,
122
+ choices=["pybgpkit", "bgpkit", "pybgpstream", "bgpdump"],
123
+ default="pybgpkit",
124
+ )
125
+
114
126
  args = parser.parse_args()
115
127
 
116
128
  filter_options = FilterOptions(
@@ -130,19 +142,25 @@ def main():
130
142
  if all(value is None for value in filter_options.model_dump().values()):
131
143
  filter_options = None
132
144
 
133
- config = BGPStreamConfig(
145
+ bgpstream_config = BGPStreamConfig(
134
146
  start_time=args.start_time,
135
147
  end_time=args.end_time,
136
148
  collectors=args.collectors,
137
149
  data_types=args.data_types,
138
- cache_dir=args.cache_dir,
139
150
  filters=filter_options,
140
151
  )
141
152
 
153
+ config = PyBGPKITStreamConfig(
154
+ bgpstream_config=bgpstream_config, cache_dir=args.cache_dir, parser=args.parser
155
+ )
156
+
157
+ for element in BGPKITStream.from_config(config):
158
+ print(element)
142
159
  try:
143
160
  for element in BGPKITStream.from_config(config):
144
161
  print(element)
145
162
  except Exception as e:
163
+ print(e)
146
164
  print(f"An error occurred during streaming: {e}", file=sys.stderr)
147
165
  sys.exit(1)
148
166
 
@@ -0,0 +1,71 @@
1
+ Metadata-Version: 2.3
2
+ Name: pybgpkitstream
3
+ Version: 0.3.0
4
+ Summary: Drop-in replacement for PyBGPStream using BGPKIT
5
+ Author: JustinLoye
6
+ Author-email: JustinLoye <jloye@iij.ad.jp>
7
+ Requires-Dist: aiohttp>=3.12.15
8
+ Requires-Dist: pybgpkit>=0.6.2
9
+ Requires-Dist: pydantic>=2.11.9
10
+ Requires-Python: >=3.10
11
+ Description-Content-Type: text/markdown
12
+
13
+ # PyBGPKITStream
14
+
15
+ A drop-in replacement for PyBGPStream using BGPKIT
16
+
17
+ ## Features
18
+
19
+ - Generates time-ordered BGP messages on the fly from RIBs and updates MRT files of multiple collectors
20
+ - Stream the same BGP messages as PyBGPStream, enabling seamless, drop-in replacement
21
+ - Lazy loading consumes minimal memory, making it suitable for large datasets
22
+ - Multiple BGP parsers supported: `pybgpkit` (default but slow), `bgpkit-parser`, `bgpdump` and `pybgpstream` single file backend (the latter three are system dependencies)
23
+ - Caching with concurrent downloading fully compatible with the BGPKIT parser's caching functionality.
24
+ - Performance: for updates, typically 3–10× faster than PyBGPStream; for RIB-only processing, currently about 3–4× slower (see [perf.md](perf.md) for test details).
25
+ - A CLI tool
26
+
27
+ ## Quick start
28
+
29
+ Installation:
30
+
31
+ ```sh
32
+ pip install pybgpkitstream
33
+ ```
34
+
35
+ Usage:
36
+
37
+ ```python
38
+ import datetime
39
+ from pybgpkitstream import BGPStreamConfig, BGPKITStream
40
+
41
+ config = BGPStreamConfig(
42
+ start_time=datetime.datetime(2010, 9, 1, 0, 0),
43
+ end_time=datetime.datetime(2010, 9, 1, 1, 59),
44
+ collectors=["route-views.sydney", "route-views.wide"],
45
+ data_types=["ribs", "updates"],
46
+ )
47
+
48
+ stream = BGPKITStream.from_config(config)
49
+
50
+ n_elems = 0
51
+ for elem in stream:
52
+ n_elems += 1
53
+
54
+ print(f"Processed {n_elems} BGP elements")
55
+ ```
56
+
57
+ or in the terminal:
58
+
59
+ ```sh
60
+ pybgpkitstream --start-time 2010-09-01T00:00:00 --end-time 2010-09-01T01:59:00 --collectors route-views.sydney route-views.wide --data-types updates > updates.txt
61
+ ```
62
+
63
+ ## Motivation
64
+
65
+ PyBGPStream is great but the implementation is complex and stops working when UC San Diego experiences a power outage.
66
+ BGPKIT broker and parser are great, but cannot be used to create an ordered stream of BGP messages from multiple collectors and multiple data types.
67
+
68
+ ## Missing features
69
+
70
+ - live mode (I plan to add semi-live soon.)
71
+ - `pybgpkitstream.BGPElement` is not fully compatible with `pybgpstream.BGPElem`: missing record_type, project, router, router_ip
@@ -1,12 +1,12 @@
1
1
  pybgpkitstream/__init__.py,sha256=OGWVhZdSvialNkIkQ1VBrmiyOcwkCA1D5IaLo7WQnPI,209
2
2
  pybgpkitstream/bgpelement.py,sha256=7mXSUmWThhIbKy2JVsLchoteve0BshT3uH8cdbAe0Go,1176
3
3
  pybgpkitstream/bgpkitstream.py,sha256=CKQv7dU-ooznuD1AjHKnZ6qRdPH1ZiOIEGtVNtU8PCY,15062
4
- pybgpkitstream/bgpparser.py,sha256=aJcVCv_ydy3xQcH_BBxQE4hc7G1rLYqqNJAXCdnrasA,14689
4
+ pybgpkitstream/bgpparser.py,sha256=eDTWV6iGZTxgF7m78UmBGLDDY6lGOc9TWsfSdJLhiY8,16264
5
5
  pybgpkitstream/bgpstreamconfig.py,sha256=vIfEN475WDIZ7kGmi3dnj_1GIQE_r6qkDFETZmMvH5E,6199
6
- pybgpkitstream/cli.py,sha256=E0E1hO0fzGhy1skBopRufdewsiSy_mA-J8Gf2WxBRxo,4214
6
+ pybgpkitstream/cli.py,sha256=4F5yCNW6OcQFJAj2Hp0rBrDmUDkR1O9x-_aKhVpXrL4,4641
7
7
  pybgpkitstream/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  pybgpkitstream/utils.py,sha256=6FwEEpBtY_20BDlJPOPFmTYQGqw7fCBLjXmnd7gjBdQ,404
9
- pybgpkitstream-0.2.0.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
10
- pybgpkitstream-0.2.0.dist-info/entry_points.txt,sha256=aWhImGlXLtRKkfyJHudcbSp5K5As4ZGL8wXZC0y6q4o,60
11
- pybgpkitstream-0.2.0.dist-info/METADATA,sha256=EBkydfiRKhgjAz4NotQqix2wX9uSHLK3iDQ_1oUSEqU,2953
12
- pybgpkitstream-0.2.0.dist-info/RECORD,,
9
+ pybgpkitstream-0.3.0.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
10
+ pybgpkitstream-0.3.0.dist-info/entry_points.txt,sha256=aWhImGlXLtRKkfyJHudcbSp5K5As4ZGL8wXZC0y6q4o,60
11
+ pybgpkitstream-0.3.0.dist-info/METADATA,sha256=kzkTcOUY8tHeZxIqbCo4pRrfQ3z3ZIzWvAbNZ5ULlfM,2356
12
+ pybgpkitstream-0.3.0.dist-info/RECORD,,
@@ -1,79 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: pybgpkitstream
3
- Version: 0.2.0
4
- Summary: Drop-in replacement for PyBGPStream using BGPKIT
5
- Author: JustinLoye
6
- Author-email: JustinLoye <jloye@iij.ad.jp>
7
- Requires-Dist: aiohttp>=3.12.15
8
- Requires-Dist: pybgpkit>=0.6.2
9
- Requires-Dist: pydantic>=2.11.9
10
- Requires-Python: >=3.10
11
- Description-Content-Type: text/markdown
12
-
13
- # PyBGPKITStream
14
-
15
- A drop-in replacement for PyBGPStream using BGPKIT
16
-
17
- ## Features
18
-
19
- - Effortless BGP Stream Switching:
20
- - Seamless, drop-in replacement ([example](tests/test_stream.py#L38))
21
- - Lazy message generation: generates time-ordered BGP messages on the fly, consuming minimal memory and making it suitable for large datasets
22
- - Supports multiple route collectors
23
- - Supports both ribs and updates
24
- - Caching with concurrent downloading is enabled and is fully compatible with the BGPKIT parser's caching functionality.
25
- - [Similar performance to PyBGPStream](examples/perf.ipynb)
26
- - A CLI tool
27
-
28
- ## Quick start
29
-
30
- Installation:
31
-
32
- ```sh
33
- pip install pybgpkitstream
34
- ```
35
-
36
- Usage:
37
-
38
- ```python
39
- import datetime
40
- from pybgpkitstream import BGPStreamConfig, BGPKITStream
41
-
42
- config = BGPStreamConfig(
43
- start_time=datetime.datetime(2010, 9, 1, 0, 0),
44
- end_time=datetime.datetime(2010, 9, 1, 1, 59),
45
- collectors=["route-views.sydney", "route-views.wide"],
46
- data_types=["ribs", "updates"],
47
- )
48
-
49
- stream = BGPKITStream.from_config(config)
50
-
51
- n_elems = 0
52
- for _ in stream:
53
- n_elems += 1
54
-
55
- print(f"Processed {n_elems} BGP elements")
56
- ```
57
-
58
- or in the terminal:
59
-
60
- ```sh
61
- pybgpkitstream --start-time 2010-09-01T00:00:00 --end-time 2010-09-01T01:59:00 --collectors route-views.sydney route-views.wide --data-types updates > updates.txt
62
- ```
63
-
64
- ## Motivation
65
-
66
- PyBGPStream is great but the implementation is complex and stops working when UC San Diego experiences a power outage.
67
- BGPKIT broker and parser are great, but cannot be used to create an ordered stream of BGP messages from multiple collectors and multiple data types.
68
-
69
- ## Missing features
70
-
71
- - live mode
72
- - `pybgpkitstream.BGPElement` is not fully compatible with `pybgpstream.BGPElem`: missing record_type (BGPKIT limitation), project (BGPKIT limitation), router (could be improved), router_ip (could be improved)
73
- - CLI output is not yet compatible with `bgpdump -m` or `bgpreader` (right now a similar-looking output is produced)
74
-
75
- ## Issues
76
-
77
- - Program will crash when working with many update files per collector (~ more than few hours of data), only when caching is disabled. This might be caused by [BGPKIT parser not being lazy](https://github.com/bgpkit/bgpkit-parser/pull/239). See [details and workaround fix](examples/many_updates.ipynb)
78
- - Filters are designed with BGPKIT in mind, and can slightly differ to pybgpstream. See [this file](tests/pybgpstream_utils.py) for a conversion to PyBGPStream filter. Note that for now the filters have not been heavily tested...
79
- - ... just like the rest of the project. Use at your own risk. The only tests I did are in /tests