pybgpkitstream 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,17 @@
1
- from .bgpstreamconfig import BGPStreamConfig, FilterOptions, PyBGPKITStreamConfig
1
+ from .bgpstreamconfig import (
2
+ BGPStreamConfig,
3
+ FilterOptions,
4
+ PyBGPKITStreamConfig,
5
+ LiveStreamConfig,
6
+ )
2
7
  from .bgpkitstream import BGPKITStream
8
+ from .bgpelement import BGPElement
3
9
 
4
- __all__ = ["BGPStreamConfig", "FilterOptions", "BGPKITStream", "PyBGPKITStreamConfig"]
10
+ __all__ = [
11
+ "BGPStreamConfig",
12
+ "FilterOptions",
13
+ "BGPKITStream",
14
+ "PyBGPKITStreamConfig",
15
+ "BGPElement",
16
+ "LiveStreamConfig",
17
+ ]
@@ -14,9 +14,9 @@ class ElementFields(TypedDict):
14
14
  class BGPElement(NamedTuple):
15
15
  """Compatible with pybgpstream.BGPElem"""
16
16
 
17
+ time: float # time first for sorting tuples convention
17
18
  type: str
18
19
  collector: str
19
- time: float
20
20
  peer_asn: int
21
21
  peer_address: str
22
22
  fields: ElementFields
@@ -31,7 +31,9 @@ class BGPElement(NamedTuple):
31
31
  self.peer_address,
32
32
  self._maybe_field("prefix"),
33
33
  self._maybe_field("next-hop"),
34
- self._maybe_field("as-path"),
34
+ " ".join(map(str, self.fields["as-path"]))
35
+ if "as-path" in self.fields
36
+ else None,
35
37
  " ".join(self.fields["communities"])
36
38
  if "communities" in self.fields
37
39
  else None,
@@ -42,3 +44,10 @@ class BGPElement(NamedTuple):
42
44
  def _maybe_field(self, field):
43
45
  """Credit to pybgpstream"""
44
46
  return self.fields[field] if field in self.fields else None
47
+
48
+ # Useful for sorting streams
49
+ def __lt__(self, other):
50
+ return self.time < other.time
51
+
52
+ def __le__(self, other):
53
+ return self.time <= other.time
@@ -19,6 +19,7 @@ from pybgpkitstream.bgpstreamconfig import (
19
19
  BGPStreamConfig,
20
20
  FilterOptions,
21
21
  PyBGPKITStreamConfig,
22
+ LiveStreamConfig,
22
23
  )
23
24
  from pybgpkitstream.bgpelement import BGPElement
24
25
  from pybgpkitstream.bgpparser import (
@@ -28,6 +29,7 @@ from pybgpkitstream.bgpparser import (
28
29
  PyBGPStreamParser,
29
30
  BGPdumpParser,
30
31
  )
32
+ from pybgpkitstream.rislive import RISLiveStream, jitter_buffer_stream
31
33
  from pybgpkitstream.utils import dt_from_filepath
32
34
 
33
35
  name2parser = {
@@ -41,23 +43,6 @@ name2parser = {
41
43
  logger = logging.getLogger(__name__)
42
44
 
43
45
 
44
- def convert_bgpkit_elem(element, is_rib: bool, collector: str) -> BGPElement:
45
- """Convert pybgpkit element to pybgpstream-like element"""
46
- return BGPElement(
47
- type="R" if is_rib else element.elem_type,
48
- collector=collector,
49
- time=element.timestamp,
50
- peer_asn=element.peer_asn,
51
- peer_address=element.peer_ip,
52
- fields={
53
- "next-hop": element.next_hop,
54
- "as-path": element.as_path,
55
- "communities": [] if not element.communities else element.communities,
56
- "prefix": element.prefix,
57
- },
58
- )
59
-
60
-
61
46
  def crc32(input_str: str):
62
47
  input_bytes = input_str.encode("utf-8")
63
48
  crc = binascii.crc32(input_bytes) & 0xFFFFFFFF
@@ -87,21 +72,22 @@ def get_shared_memory():
87
72
  class BGPKITStream:
88
73
  def __init__(
89
74
  self,
90
- ts_start: float,
91
- ts_end: float,
92
- collector_id: str,
75
+ collectors: list[str],
93
76
  data_type: list[Literal["update", "rib"]],
94
- filters: FilterOptions | None,
77
+ ts_start: float = None,
78
+ ts_end: float = None,
79
+ filters: FilterOptions | None = None,
95
80
  cache_dir: str | None = None,
96
81
  max_concurrent_downloads: int | None = 10,
97
82
  chunk_time: float | None = datetime.timedelta(hours=2).seconds,
98
83
  ram_fetch: bool | None = True,
99
84
  parser_name: str | None = "pybgpkit",
85
+ jitter_buffer_delay: float | None = 10.0,
100
86
  ):
101
87
  # Stream config
102
88
  self.ts_start = ts_start
103
89
  self.ts_end = ts_end
104
- self.collector_id = collector_id
90
+ self.collectors = collectors
105
91
  self.data_type = data_type
106
92
  if not filters:
107
93
  filters = FilterOptions()
@@ -126,6 +112,9 @@ class BGPKITStream:
126
112
  self.broker = bgpkit.Broker()
127
113
  self.parser_cls: BGPParser = name2parser[parser_name]
128
114
 
115
+ # Live config
116
+ self.jitter_buffer_delay = jitter_buffer_delay
117
+
129
118
  @staticmethod
130
119
  def _generate_cache_filename(url):
131
120
  """Generate a cache filename compatible with BGPKIT parser."""
@@ -163,7 +152,7 @@ class BGPKITStream:
163
152
  items: list[BrokerItem] = self.broker.query(
164
153
  ts_start=int(self.ts_start - 60),
165
154
  ts_end=int(self.ts_end),
166
- collector_id=self.collector_id,
155
+ collector_id=",".join(self.collectors),
167
156
  data_type=data_type,
168
157
  )
169
158
  for item in items:
@@ -226,6 +215,8 @@ class BGPKITStream:
226
215
  logging.info("All downloads finished.")
227
216
 
228
217
  def __iter__(self):
218
+ if self.ts_start is None and self.ts_end is None:
219
+ return self._iter_live()
229
220
  if "update" in self.data_type:
230
221
  return self._iter_update()
231
222
  else:
@@ -250,7 +241,7 @@ class BGPKITStream:
250
241
  ts_start=current,
251
242
  ts_end=chunk_end
252
243
  - 1, # remove one second because BGPKIT include border
253
- collector_id=self.collector_id,
244
+ collectors=self.collectors,
254
245
  data_type=self.data_type,
255
246
  cache_dir=self.cache_dir.name
256
247
  if isinstance(self.cache_dir, Directory)
@@ -319,7 +310,7 @@ class BGPKITStream:
319
310
  ts_start=current,
320
311
  ts_end=chunk_end
321
312
  - 1, # remove one second because BGPKIT include border
322
- collector_id=self.collector_id,
313
+ collectors=self.collectors,
323
314
  data_type=self.data_type,
324
315
  cache_dir=self.cache_dir.name
325
316
  if isinstance(self.cache_dir, Directory)
@@ -365,14 +356,30 @@ class BGPKITStream:
365
356
  finally:
366
357
  self.cache_dir.cleanup()
367
358
 
359
+ def _iter_live(self) -> Iterator[BGPElement]:
360
+
361
+ ris_collectors = [
362
+ collector for collector in self.collectors if collector[:3] == "rrc"
363
+ ]
364
+
365
+ stream = RISLiveStream(collectors=ris_collectors, filters=self.filters)
366
+
367
+ if self.jitter_buffer_delay is not None and self.jitter_buffer_delay > 0:
368
+ stream = jitter_buffer_stream(stream, buffer_delay=self.jitter_buffer_delay)
369
+
370
+ for elem in stream:
371
+ yield elem
372
+
368
373
  @classmethod
369
- def from_config(cls, config: PyBGPKITStreamConfig | BGPStreamConfig):
374
+ def from_config(
375
+ cls, config: PyBGPKITStreamConfig | BGPStreamConfig | LiveStreamConfig
376
+ ):
370
377
  if isinstance(config, PyBGPKITStreamConfig):
371
378
  stream_config = config.bgpstream_config
372
379
  return cls(
373
380
  ts_start=stream_config.start_time.timestamp(),
374
381
  ts_end=stream_config.end_time.timestamp(),
375
- collector_id=",".join(stream_config.collectors),
382
+ collectors=stream_config.collectors,
376
383
  data_type=[dtype[:-1] for dtype in stream_config.data_types],
377
384
  filters=stream_config.filters
378
385
  if stream_config.filters
@@ -387,10 +394,29 @@ class BGPKITStream:
387
394
  )
388
395
 
389
396
  elif isinstance(config, BGPStreamConfig):
397
+ if not config.is_live():
398
+ return cls(
399
+ ts_start=config.start_time.timestamp(),
400
+ ts_end=config.end_time.timestamp(),
401
+ collectors=config.collectors,
402
+ data_type=[dtype[:-1] for dtype in config.data_types],
403
+ filters=config.filters if config.filters else FilterOptions(),
404
+ )
405
+ else:
406
+ return cls(
407
+ collectors=config.collectors,
408
+ data_type=["update"],
409
+ filters=config.filters if config.filters else FilterOptions(),
410
+ jitter_buffer_delay=10,
411
+ )
412
+
413
+ elif isinstance(config, LiveStreamConfig):
390
414
  return cls(
391
- ts_start=config.start_time.timestamp(),
392
- ts_end=config.end_time.timestamp(),
393
- collector_id=",".join(config.collectors),
394
- data_type=[dtype[:-1] for dtype in config.data_types],
415
+ collectors=config.collectors,
416
+ data_type=["update"],
395
417
  filters=config.filters if config.filters else FilterOptions(),
418
+ jitter_buffer_delay=config.jitter_buffer_delay,
396
419
  )
420
+
421
+ else:
422
+ raise ValueError("Unsupported config type")
@@ -6,6 +6,7 @@ import re
6
6
  import ipaddress
7
7
  import subprocess as sp
8
8
  from pybgpkitstream.utils import dt_from_filepath
9
+ import logging
9
10
 
10
11
  try:
11
12
  import pybgpstream
@@ -36,12 +37,21 @@ class PyBGPKITParser(BGPParser):
36
37
  self.parser = None # placeholder for lazy instantiation
37
38
  self.is_rib = is_rib
38
39
  self.collector = collector
39
- self.filters = filters.model_dump(exclude_unset=True)
40
+ self.filters: dict = filters.model_dump(exclude_unset=True, exclude_none=True)
40
41
  # cast int ipv to pybgpkit ipv4 or ipv6 string
41
42
  if "ip_version" in self.filters:
42
43
  ipv_int = self.filters["ip_version"]
43
44
  if ipv_int:
44
45
  self.filters["ip_version"] = f"ipv{ipv_int}"
46
+ if self.filters.get("peer_asn"):
47
+ self.filters["peer_asn"] = str(self.filters["peer_asn"])
48
+ if self.filters.get("origin_asn"):
49
+ self.filters["origin_asn"] = str(self.filters["origin_asn"])
50
+ if self.filters.get("update_type"):
51
+ val = self.filters.pop("update_type")
52
+ self.filters["type"] = val
53
+ if self.filters.get("peer_ips"):
54
+ self.filters["peer_ips"] = ", ".join(self.filters["peer_ips"])
45
55
 
46
56
  def _convert(self, element) -> BGPElement:
47
57
  return BGPElement(
@@ -105,9 +115,9 @@ class BGPKITParser(BGPParser):
105
115
  # Structure: Type|Time|PeerIP|PeerAS|Prefix
106
116
  if rec_type == "W":
107
117
  return BGPElement(
118
+ time=self.time, # force RIB filename timestamp instead of last changed
108
119
  type="W",
109
120
  collector=self.collector,
110
- time=self.time, # force RIB filename timestamp instead of last changed
111
121
  peer_asn=int(element[3]),
112
122
  peer_address=element[2],
113
123
  fields={"prefix": element[4]},
@@ -123,11 +133,10 @@ class BGPKITParser(BGPParser):
123
133
 
124
134
  return BGPElement(
125
135
  # bgpkit outputs 'A' for both Updates and RIB entries.
126
- # Map to "A" (Announcement) or change to "R" if you strictly need RIB typing.
136
+ self.time,
127
137
  "R" if self.is_rib else rec_type,
128
138
  self.collector,
129
139
  # float(element[1]),
130
- self.time,
131
140
  int(element[3]),
132
141
  element[2],
133
142
  {
@@ -154,16 +163,44 @@ class PyBGPStreamParser(BGPParser):
154
163
  ):
155
164
  self.filepath = filepath
156
165
  self.collector = collector
157
- self.filters = generate_bgpstream_filters(filters) if filters else None
166
+ self.filters = filters
158
167
 
159
- def __iter__(self):
160
- stream = pybgpstream.BGPStream(data_interface="singlefile", filter=self.filters)
168
+ def _iter_normal(self):
169
+ """when there is no filter or filters are supported by pybgpstream"""
170
+ stream = pybgpstream.BGPStream(
171
+ data_interface="singlefile",
172
+ filter=generate_bgpstream_filters(self.filters) if self.filters else None,
173
+ )
161
174
  stream.set_data_interface_option("singlefile", "rib-file", self.filepath)
162
175
 
163
176
  for elem in stream:
164
177
  elem.collector = self.collector
165
178
  yield elem
166
179
 
180
+ def _iter_python_filter(self):
181
+ """when filters are not supported by pybgpstream, filter from the python side"""
182
+ bgpstream_filter = generate_bgpstream_filters(self.filters)
183
+ stream = pybgpstream.BGPStream(
184
+ data_interface="singlefile",
185
+ filter=bgpstream_filter if bgpstream_filter else None,
186
+ )
187
+ stream.set_data_interface_option("singlefile", "rib-file", self.filepath)
188
+ peer_ips = set(self.filters.peer_ips)
189
+
190
+ for elem in stream:
191
+ if elem.peer_address not in peer_ips:
192
+ continue
193
+ elem.collector = self.collector
194
+ yield elem
195
+
196
+ def __iter__(self):
197
+ if not self.filters.peer_ip and not self.filters.peer_ips:
198
+ return self._iter_normal()
199
+ else:
200
+ if self.filters.peer_ip:
201
+ self.filters.peer_ips = [self.filters.peer_ip]
202
+ return self._iter_python_filter()
203
+
167
204
 
168
205
  class BGPdumpParser(BGPParser):
169
206
  """Run bgpdump as a subprocess. I might have over-engineered the filtering."""
@@ -204,9 +241,9 @@ class BGPdumpParser(BGPParser):
204
241
  # 1. Handle Withdrawals (Fastest path, fewer fields)
205
242
  if elem_type == "W":
206
243
  return BGPElement(
244
+ float(element[1]),
207
245
  "W",
208
246
  self.collector,
209
- float(element[1]),
210
247
  int(element[4]),
211
248
  element[3],
212
249
  {"prefix": element[5]}, # Dict literal is faster than assignment
@@ -219,9 +256,9 @@ class BGPdumpParser(BGPParser):
219
256
  # Logic: if TABLE_DUMP2, type is R, else A
220
257
  # Construct fields dict in one shot (BUILD_MAP opcode)
221
258
  return BGPElement(
259
+ float(element[1]),
222
260
  "R" if elem_type == "B" else "A",
223
261
  self.collector,
224
- float(element[1]),
225
262
  int(element[4]),
226
263
  element[3],
227
264
  {
@@ -229,7 +266,7 @@ class BGPdumpParser(BGPParser):
229
266
  "as-path": element[6],
230
267
  "next-hop": element[8],
231
268
  # Check for empty string before splitting (avoids creating [''])
232
- "communities": rec_comm.split() if rec_comm else [],
269
+ "communities": rec_comm.split(" ") if rec_comm else [],
233
270
  },
234
271
  )
235
272
 
@@ -362,12 +399,12 @@ def generate_bgpstream_filters(f: FilterOptions) -> str | None:
362
399
  parts.append(f"prefix any {f.prefix_super_sub}")
363
400
 
364
401
  if f.ip_version:
365
- parts.append(f"ipversion {f.ip_version[-1]}")
402
+ parts.append(f"ipversion {f.ip_version}")
366
403
 
367
404
  # Warn about unsupported fields
368
405
  if f.peer_ip or f.peer_ips:
369
- print(
370
- "Warning: peer_ip and peer_ips are not supported by this BGPStream filter string parser and will be ignored."
406
+ logging.info(
407
+ "Filtering by peer_ip is not supported natively by pybgpstream (falling back to python-side filtering)"
371
408
  )
372
409
 
373
410
  # Join all parts with 'and' as required by the parser
@@ -48,20 +48,27 @@ class FilterOptions(BaseModel):
48
48
 
49
49
 
50
50
  class BGPStreamConfig(BaseModel):
51
- """Unified BGPStream config, compatible with BGPKIT and pybgpstream"""
51
+ """Unified BGPStream config"""
52
52
 
53
- start_time: datetime.datetime = Field(description="Start of the stream")
54
- end_time: datetime.datetime = Field(description="End of the stream")
53
+ start_time: datetime.datetime | None = Field(
54
+ default=None, description="Start of the stream"
55
+ )
56
+ end_time: datetime.datetime | None = Field(
57
+ default=None, description="End of the stream"
58
+ )
55
59
  collectors: list[str] = Field(description="List of collectors to get data from")
56
- data_types: list[Literal["ribs", "updates"]] = Field(
57
- description="List of archives files to consider (`ribs` or `updates`)"
60
+ data_types: list[Literal["ribs", "updates"]] | None = Field(
61
+ default=["updates"],
62
+ description="List of archives files to consider (`ribs` or `updates`)",
58
63
  )
59
64
 
60
65
  filters: FilterOptions | None = Field(default=None, description="Optional filters")
61
66
 
62
- @field_validator("start_time", "end_time")
67
+ @field_validator("start_time", "end_time", mode="before")
63
68
  @classmethod
64
69
  def normalize_to_utc(cls, dt: datetime.datetime) -> datetime.datetime:
70
+ if dt is None:
71
+ return None
65
72
  # if naive datetime (not timezone-aware) assume it's UTC
66
73
  if dt.tzinfo is None:
67
74
  return dt.replace(tzinfo=datetime.timezone.utc)
@@ -69,6 +76,38 @@ class BGPStreamConfig(BaseModel):
69
76
  else:
70
77
  return dt.astimezone(datetime.timezone.utc)
71
78
 
79
+ @model_validator(mode="after")
80
+ def validate(self) -> "BGPStreamConfig":
81
+
82
+ if (self.start_time is None) ^ (self.end_time is None):
83
+ raise ValueError(
84
+ "Provide both start and end times, or leave both as None for live mode."
85
+ )
86
+ if not self.is_live():
87
+ assert self.start_time < self.end_time
88
+ # Force data_type to update for live mode
89
+ else:
90
+ if self.data_types is None:
91
+ self.data_types = ["updates"]
92
+
93
+ return self
94
+
95
+ def is_live(self) -> bool:
96
+ return self.start_time is None and self.end_time is None
97
+
98
+
99
+ class LiveStreamConfig(BaseModel):
100
+ """Config for live mode"""
101
+
102
+ collectors: list[str] = Field(
103
+ description="List of collectors to get data from (for now only RIS live collectors)"
104
+ )
105
+ filters: FilterOptions | None = Field(default=None, description="Optional filters")
106
+ jitter_buffer_delay: float | None = Field(
107
+ default=10.0,
108
+ description="Jitter buffer time in seconds to make sure RIS live updates are time-sorted. Introduce a slight delay. Set to None or 0 to disable",
109
+ )
110
+
72
111
 
73
112
  class PyBGPKITStreamConfig(BaseModel):
74
113
  """Unified BGPStream config and parameters related to PyBGPKIT implementation (all optional)"""
@@ -135,26 +174,31 @@ class PyBGPKITStreamConfig(BaseModel):
135
174
  raise ValueError(
136
175
  "bgpkit binary not found in PATH. "
137
176
  "Install from: https://github.com/bgpkit/bgpkit-parser "
138
- "or use cargo: cargo install bgpkit-parser"
177
+ "or use cargo: cargo install bgpkit-parser --features cli"
139
178
  )
140
179
 
141
- # Return the parser value if validation passes
142
180
  return parser
143
-
144
- @model_validator(mode='before')
181
+
182
+ @model_validator(mode="before")
145
183
  @classmethod
146
184
  def nest_bgpstream_params(cls, data: dict) -> dict:
147
185
  """Allow to define a flat config"""
148
186
  # If the user already provided 'bgpstream_config', do nothing
149
187
  if "bgpstream_config" in data:
150
188
  return data
151
-
189
+
152
190
  # Define which fields belong to the inner BGPStreamConfig
153
- stream_fields = {"start_time", "end_time", "collectors", "data_types", "filters"}
154
-
191
+ stream_fields = {
192
+ "start_time",
193
+ "end_time",
194
+ "collectors",
195
+ "data_types",
196
+ "filters",
197
+ }
198
+
155
199
  # Extract those fields from the flat input
156
200
  inner_data = {k: data.pop(k) for k in stream_fields if k in data}
157
-
201
+
158
202
  # Nest them back into the dictionary
159
203
  data["bgpstream_config"] = inner_data
160
204
  return data
pybgpkitstream/cli.py CHANGED
@@ -2,8 +2,12 @@ import argparse
2
2
  import sys
3
3
  import datetime
4
4
 
5
- from pybgpkitstream import BGPStreamConfig, FilterOptions
6
- from pybgpkitstream import BGPKITStream
5
+ from pybgpkitstream import (
6
+ BGPStreamConfig,
7
+ FilterOptions,
8
+ PyBGPKITStreamConfig,
9
+ BGPKITStream,
10
+ )
7
11
 
8
12
 
9
13
  def main():
@@ -40,12 +44,6 @@ def main():
40
44
  default=["updates"],
41
45
  help="List of archives to consider ('ribs' or 'updates').",
42
46
  )
43
- parser.add_argument(
44
- "--cache-dir",
45
- type=str,
46
- default=None,
47
- help="Directory for caching downloaded files.",
48
- )
49
47
 
50
48
  # Arguments for FilterOptions
51
49
  parser.add_argument(
@@ -93,7 +91,7 @@ def main():
93
91
  )
94
92
  parser.add_argument(
95
93
  "--peer-asn",
96
- type=str,
94
+ type=int,
97
95
  default=None,
98
96
  help="Filter by the AS number of the BGP peer.",
99
97
  )
@@ -111,6 +109,20 @@ def main():
111
109
  help="Filter by a regular expression matching the AS path.",
112
110
  )
113
111
 
112
+ # PyBGPKITStream implementation parameters
113
+ parser.add_argument(
114
+ "--cache-dir",
115
+ type=str,
116
+ default=None,
117
+ help="Directory for caching downloaded files.",
118
+ )
119
+ parser.add_argument(
120
+ "--parser",
121
+ type=str,
122
+ choices=["pybgpkit", "bgpkit", "pybgpstream", "bgpdump"],
123
+ default="pybgpkit",
124
+ )
125
+
114
126
  args = parser.parse_args()
115
127
 
116
128
  filter_options = FilterOptions(
@@ -130,15 +142,18 @@ def main():
130
142
  if all(value is None for value in filter_options.model_dump().values()):
131
143
  filter_options = None
132
144
 
133
- config = BGPStreamConfig(
145
+ bgpstream_config = BGPStreamConfig(
134
146
  start_time=args.start_time,
135
147
  end_time=args.end_time,
136
148
  collectors=args.collectors,
137
149
  data_types=args.data_types,
138
- cache_dir=args.cache_dir,
139
150
  filters=filter_options,
140
151
  )
141
152
 
153
+ config = PyBGPKITStreamConfig(
154
+ bgpstream_config=bgpstream_config, cache_dir=args.cache_dir, parser=args.parser
155
+ )
156
+
142
157
  try:
143
158
  for element in BGPKITStream.from_config(config):
144
159
  print(element)
@@ -0,0 +1,141 @@
1
+ from typing import Iterator
2
+ import json
3
+ import heapq
4
+ import websocket
5
+
6
+ from pybgpkitstream.bgpelement import BGPElement
7
+ from pybgpkitstream.bgpstreamconfig import FilterOptions
8
+
9
+
10
+ def ris_message2bgpelem(ris_message: dict) -> Iterator[BGPElement]:
11
+
12
+ timestamp = float(ris_message["timestamp"])
13
+ collector = ris_message["host"].split(".")[0]
14
+ peer_asn = int(ris_message["peer_asn"])
15
+ peer_address = ris_message["peer"]
16
+ path = ris_message["path"]
17
+ communities = ris_message["community"]
18
+ if communities:
19
+ communities = [f"{asn}:{community}" for asn, community in communities]
20
+
21
+ for pfx in ris_message["withdrawals"]:
22
+ yield BGPElement(
23
+ type="W",
24
+ collector=collector,
25
+ time=timestamp,
26
+ peer_asn=peer_asn,
27
+ peer_address=peer_address,
28
+ fields={
29
+ "as-path": path,
30
+ "communities": communities,
31
+ "prefix": pfx,
32
+ },
33
+ )
34
+
35
+ for announcement in ris_message["announcements"]:
36
+ for pfx in announcement["prefixes"]:
37
+ yield BGPElement(
38
+ type="A",
39
+ collector=collector,
40
+ time=timestamp,
41
+ peer_asn=peer_asn,
42
+ peer_address=peer_address,
43
+ fields={
44
+ "next-hop": announcement["next_hop"].split(",")[0],
45
+ "as-path": path,
46
+ "communities": communities,
47
+ "prefix": pfx,
48
+ },
49
+ )
50
+
51
+
52
+ class RISLiveStream:
53
+ def __init__(
54
+ self,
55
+ collectors: list[str],
56
+ client="pybgpkitstream",
57
+ filters: FilterOptions = None,
58
+ ):
59
+ self.collectors = collectors
60
+ self.client = client
61
+ print(filters)
62
+ self.filters = self._convert_filter_options(filters)
63
+ print(self.filters)
64
+
65
+ @staticmethod
66
+ def _convert_filter_options(f: FilterOptions) -> dict:
67
+ """Convert FilterOptions to RIS live filters"""
68
+ if f is None:
69
+ return {}
70
+
71
+ if not f.model_dump(exclude_unset=True):
72
+ return {}
73
+
74
+ res = {}
75
+ if f.update_type == "withdraw":
76
+ res["require"] = "withdrawals"
77
+ elif f.update_type == "announce":
78
+ res["require"] = "announcements"
79
+ if f.peer_ip:
80
+ res["peer"] = f.peer_ip
81
+ path_elements = []
82
+ if f.peer_asn:
83
+ path_elements.append(f"^{f.peer_asn}")
84
+ if f.origin_asn:
85
+ path_elements.append(f"{f.origin_asn}$")
86
+ res["path"] = ",".join(path_elements)
87
+
88
+ if f.prefix:
89
+ res["prefix"] = f.prefix
90
+ # default is True which I think is not consistent with BGPKIT/BGPStream
91
+ res["moreSpecific"] = False
92
+ if f.prefix_sub:
93
+ res["prefix"] = f.prefix_sub
94
+ res["moreSpecific"] = True
95
+ if f.prefix_super:
96
+ res["prefix"] = f.prefix_super
97
+ res["lessSpecific"] = True
98
+ if f.prefix_super_sub:
99
+ res["prefix"] = f.prefix_super_sub
100
+ res["moreSpecific"] = True
101
+ res["lessSpecific"] = True
102
+
103
+ return res
104
+
105
+ def __iter__(self) -> Iterator[BGPElement]:
106
+ ws = websocket.WebSocket()
107
+ ws.connect(f"wss://ris-live.ripe.net/v1/ws/?client={self.client}")
108
+
109
+ # Subscribe to each collector on the same connection
110
+ for collector in self.collectors:
111
+ params = {"host": collector, "type": "UPDATE"}
112
+ params = params | self.filters
113
+ print(params)
114
+ ws.send(json.dumps({"type": "ris_subscribe", "data": params}))
115
+
116
+ for data in ws:
117
+ parsed = json.loads(data)["data"]
118
+ yield from ris_message2bgpelem(parsed)
119
+
120
+
121
+ def jitter_buffer_stream(stream, buffer_delay=10) -> Iterator[BGPElement]:
122
+ """
123
+ Produces an ordered stream by buffering elements for `buffer_delay` seconds.
124
+ """
125
+ heap = []
126
+ max_ts_seen = float("-inf")
127
+
128
+ for elem in stream:
129
+ # Track the latest timestamp seen in the jittery stream
130
+ if elem.time > max_ts_seen:
131
+ max_ts_seen = elem.time
132
+
133
+ heapq.heappush(heap, elem)
134
+
135
+ # Flush from buffer if timestamp is old enough
136
+ while heap and (max_ts_seen - heap[0].time) > buffer_delay:
137
+ yield heapq.heappop(heap)
138
+
139
+ # Clean up when stream ends (never hopefully)
140
+ while heap:
141
+ yield heapq.heappop(heap)
pybgpkitstream/utils.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import datetime
2
2
  import re
3
3
 
4
+
4
5
  def dt_from_filepath(filepath: str, pattern=r"(\d{8}\.\d{4})") -> datetime.datetime:
5
6
  match = re.search(pattern, filepath)
6
7
  if not match:
@@ -8,4 +9,4 @@ def dt_from_filepath(filepath: str, pattern=r"(\d{8}\.\d{4})") -> datetime.datet
8
9
  timestamp_str = match.group(1)
9
10
  dt = datetime.datetime.strptime(timestamp_str, "%Y%m%d.%H%M")
10
11
  dt = dt.replace(tzinfo=datetime.timezone.utc)
11
- return dt
12
+ return dt
@@ -0,0 +1,72 @@
1
+ Metadata-Version: 2.3
2
+ Name: pybgpkitstream
3
+ Version: 0.4.0
4
+ Summary: Drop-in replacement for PyBGPStream using BGPKIT
5
+ Author: JustinLoye
6
+ Author-email: JustinLoye <jloye@iij.ad.jp>
7
+ Requires-Dist: aiohttp>=3.12.15
8
+ Requires-Dist: pybgpkit>=0.6.2
9
+ Requires-Dist: pydantic>=2.11.9
10
+ Requires-Dist: websocket-client>=1.8.0
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+
14
+ # PyBGPKITStream
15
+
16
+ A drop-in replacement for PyBGPStream using BGPKIT
17
+
18
+ ## Features
19
+
20
+ - Generates time-ordered BGP messages on the fly from RIBs and updates MRT files of multiple collectors
21
+ - Stream the same BGP messages as PyBGPStream, enabling seamless, drop-in replacement
22
+ - Lazy loading consumes minimal memory, making it suitable for large datasets
23
+ - Multiple BGP parsers supported: `pybgpkit` (default but slow), `bgpkit-parser`, `bgpdump` and `pybgpstream` single file backend (the latter three are system dependencies)
24
+ - Caching with concurrent downloading fully compatible with the BGPKIT parser's caching functionality.
25
+ - Performance: for updates, typically 3–10× faster than PyBGPStream; for RIB-only processing, currently about 3–4× slower (see [perf.md](perf.md) for test details).
26
+ - A CLI tool
27
+
28
+ ## Quick start
29
+
30
+ Installation:
31
+
32
+ ```sh
33
+ pip install pybgpkitstream
34
+ ```
35
+
36
+ Usage:
37
+
38
+ ```python
39
+ import datetime
40
+ from pybgpkitstream import BGPStreamConfig, BGPKITStream
41
+
42
+ config = BGPStreamConfig(
43
+ start_time=datetime.datetime(2010, 9, 1, 0, 0),
44
+ end_time=datetime.datetime(2010, 9, 1, 1, 59),
45
+ collectors=["route-views.sydney", "route-views.wide"],
46
+ data_types=["ribs", "updates"],
47
+ )
48
+
49
+ stream = BGPKITStream.from_config(config)
50
+
51
+ n_elems = 0
52
+ for elem in stream:
53
+ n_elems += 1
54
+
55
+ print(f"Processed {n_elems} BGP elements")
56
+ ```
57
+
58
+ or in the terminal:
59
+
60
+ ```sh
61
+ pybgpkitstream --start-time 2010-09-01T00:00:00 --end-time 2010-09-01T01:59:00 --collectors route-views.sydney route-views.wide --data-types updates > updates.txt
62
+ ```
63
+
64
+ ## Motivation
65
+
66
+ PyBGPStream is great but the implementation is complex and stops working when UC San Diego experiences a power outage.
67
+ BGPKIT broker and parser are great, but cannot be used to create an ordered stream of BGP messages from multiple collectors and multiple data types.
68
+
69
+ ## Missing features
70
+
71
+ - Live mode for RouteViews collectors
72
+ - Some PyBGPStream data interface options like csv or sqlite
@@ -0,0 +1,13 @@
1
+ pybgpkitstream/__init__.py,sha256=_i1D2bx1SanDe8BZ8B4S1349an7-1JW4ptsY5T8gig4,344
2
+ pybgpkitstream/bgpelement.py,sha256=Kr4YLk14vRZtQeYQv_k-NcODmgP0MHdXaIlUzeRERjk,1466
3
+ pybgpkitstream/bgpkitstream.py,sha256=Bw7V4RYg7Bmkkl2XG4LPiFZGEECOkijcd59EUxsBBbQ,16031
4
+ pybgpkitstream/bgpparser.py,sha256=uQJmbaRQOXsCkTnTW8Lhi336cLrcMWCaoAyNe4ZE1B0,16177
5
+ pybgpkitstream/bgpstreamconfig.py,sha256=PaXMREUFb6zj9Y4Zpnlb1nekV8zGYIyEtvSp99hDuic,7480
6
+ pybgpkitstream/cli.py,sha256=U1jFjEwkuySk7OhUR2sYWkiYFEx2XDLGsSwZM70zGcE,4548
7
+ pybgpkitstream/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ pybgpkitstream/rislive.py,sha256=e0o8qF6nT_hJnDbU7f8ZuPdTwEnO0H3lHlJMqp3rqlU,4437
9
+ pybgpkitstream/utils.py,sha256=BzPUKRdJ48bP73eT4LWByrmN0IIIUdBYuATINMXWpTE,406
10
+ pybgpkitstream-0.4.0.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
11
+ pybgpkitstream-0.4.0.dist-info/entry_points.txt,sha256=aWhImGlXLtRKkfyJHudcbSp5K5As4ZGL8wXZC0y6q4o,60
12
+ pybgpkitstream-0.4.0.dist-info/METADATA,sha256=F4sExUM0TL01r8MmV6bftKQWrkH93i18TyreJDwuIuM,2320
13
+ pybgpkitstream-0.4.0.dist-info/RECORD,,
@@ -1,79 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: pybgpkitstream
3
- Version: 0.2.0
4
- Summary: Drop-in replacement for PyBGPStream using BGPKIT
5
- Author: JustinLoye
6
- Author-email: JustinLoye <jloye@iij.ad.jp>
7
- Requires-Dist: aiohttp>=3.12.15
8
- Requires-Dist: pybgpkit>=0.6.2
9
- Requires-Dist: pydantic>=2.11.9
10
- Requires-Python: >=3.10
11
- Description-Content-Type: text/markdown
12
-
13
- # PyBGPKITStream
14
-
15
- A drop-in replacement for PyBGPStream using BGPKIT
16
-
17
- ## Features
18
-
19
- - Effortless BGP Stream Switching:
20
- - Seamless, drop-in replacement ([example](tests/test_stream.py#L38))
21
- - Lazy message generation: generates time-ordered BGP messages on the fly, consuming minimal memory and making it suitable for large datasets
22
- - Supports multiple route collectors
23
- - Supports both ribs and updates
24
- - Caching with concurrent downloading is enabled and is fully compatible with the BGPKIT parser's caching functionality.
25
- - [Similar performance to PyBGPStream](examples/perf.ipynb)
26
- - A CLI tool
27
-
28
- ## Quick start
29
-
30
- Installation:
31
-
32
- ```sh
33
- pip install pybgpkitstream
34
- ```
35
-
36
- Usage:
37
-
38
- ```python
39
- import datetime
40
- from pybgpkitstream import BGPStreamConfig, BGPKITStream
41
-
42
- config = BGPStreamConfig(
43
- start_time=datetime.datetime(2010, 9, 1, 0, 0),
44
- end_time=datetime.datetime(2010, 9, 1, 1, 59),
45
- collectors=["route-views.sydney", "route-views.wide"],
46
- data_types=["ribs", "updates"],
47
- )
48
-
49
- stream = BGPKITStream.from_config(config)
50
-
51
- n_elems = 0
52
- for _ in stream:
53
- n_elems += 1
54
-
55
- print(f"Processed {n_elems} BGP elements")
56
- ```
57
-
58
- or in the terminal:
59
-
60
- ```sh
61
- pybgpkitstream --start-time 2010-09-01T00:00:00 --end-time 2010-09-01T01:59:00 --collectors route-views.sydney route-views.wide --data-types updates > updates.txt
62
- ```
63
-
64
- ## Motivation
65
-
66
- PyBGPStream is great but the implementation is complex and stops working when UC San Diego experiences a power outage.
67
- BGPKIT broker and parser are great, but cannot be used to create an ordered stream of BGP messages from multiple collectors and multiple data types.
68
-
69
- ## Missing features
70
-
71
- - live mode
72
- - `pybgpkitstream.BGPElement` is not fully compatible with `pybgpstream.BGPElem`: missing record_type (BGPKIT limitation), project (BGPKIT limitation), router (could be improved), router_ip (could be improved)
73
- - CLI output is not yet compatible with `bgpdump -m` or `bgpreader` (right now a similar-looking output is produced)
74
-
75
- ## Issues
76
-
77
- - Program will crash when working with many update files per collector (~ more than few hours of data), only when caching is disabled. This might be caused by [BGPKIT parser not being lazy](https://github.com/bgpkit/bgpkit-parser/pull/239). See [details and workaround fix](examples/many_updates.ipynb)
78
- - Filters are designed with BGPKIT in mind, and can slightly differ to pybgpstream. See [this file](tests/pybgpstream_utils.py) for a conversion to PyBGPStream filter. Note that for now the filters have not been heavily tested...
79
- - ... just like the rest of the project. Use at your own risk. The only tests I did are in /tests
@@ -1,12 +0,0 @@
1
- pybgpkitstream/__init__.py,sha256=OGWVhZdSvialNkIkQ1VBrmiyOcwkCA1D5IaLo7WQnPI,209
2
- pybgpkitstream/bgpelement.py,sha256=7mXSUmWThhIbKy2JVsLchoteve0BshT3uH8cdbAe0Go,1176
3
- pybgpkitstream/bgpkitstream.py,sha256=CKQv7dU-ooznuD1AjHKnZ6qRdPH1ZiOIEGtVNtU8PCY,15062
4
- pybgpkitstream/bgpparser.py,sha256=aJcVCv_ydy3xQcH_BBxQE4hc7G1rLYqqNJAXCdnrasA,14689
5
- pybgpkitstream/bgpstreamconfig.py,sha256=vIfEN475WDIZ7kGmi3dnj_1GIQE_r6qkDFETZmMvH5E,6199
6
- pybgpkitstream/cli.py,sha256=E0E1hO0fzGhy1skBopRufdewsiSy_mA-J8Gf2WxBRxo,4214
7
- pybgpkitstream/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- pybgpkitstream/utils.py,sha256=6FwEEpBtY_20BDlJPOPFmTYQGqw7fCBLjXmnd7gjBdQ,404
9
- pybgpkitstream-0.2.0.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
10
- pybgpkitstream-0.2.0.dist-info/entry_points.txt,sha256=aWhImGlXLtRKkfyJHudcbSp5K5As4ZGL8wXZC0y6q4o,60
11
- pybgpkitstream-0.2.0.dist-info/METADATA,sha256=EBkydfiRKhgjAz4NotQqix2wX9uSHLK3iDQ_1oUSEqU,2953
12
- pybgpkitstream-0.2.0.dist-info/RECORD,,