pybgpkitstream 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pybgpkitstream/bgpkitstream.py +90 -42
- pybgpkitstream/bgpstreamconfig.py +7 -1
- {pybgpkitstream-0.1.2.dist-info → pybgpkitstream-0.1.4.dist-info}/METADATA +1 -1
- pybgpkitstream-0.1.4.dist-info/RECORD +10 -0
- {pybgpkitstream-0.1.2.dist-info → pybgpkitstream-0.1.4.dist-info}/WHEEL +1 -1
- pybgpkitstream-0.1.2.dist-info/RECORD +0 -10
- {pybgpkitstream-0.1.2.dist-info → pybgpkitstream-0.1.4.dist-info}/entry_points.txt +0 -0
pybgpkitstream/bgpkitstream.py
CHANGED
|
@@ -9,6 +9,7 @@ from heapq import merge
|
|
|
9
9
|
from operator import itemgetter
|
|
10
10
|
import binascii
|
|
11
11
|
import logging
|
|
12
|
+
from tempfile import TemporaryDirectory
|
|
12
13
|
|
|
13
14
|
import aiohttp
|
|
14
15
|
import bgpkit
|
|
@@ -44,24 +45,39 @@ def crc32(input_str: str):
|
|
|
44
45
|
return f"{crc:08x}"
|
|
45
46
|
|
|
46
47
|
|
|
48
|
+
class Directory:
|
|
49
|
+
"""Permanent directory that mimics TemporaryDirectory interface."""
|
|
50
|
+
|
|
51
|
+
def __init__(self, path):
|
|
52
|
+
self.name = str(path)
|
|
53
|
+
|
|
54
|
+
def cleanup(self):
|
|
55
|
+
"""No-op cleanup for permanent directories."""
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
|
|
47
59
|
class BGPKITStream:
|
|
48
60
|
def __init__(
|
|
49
61
|
self,
|
|
50
|
-
ts_start:
|
|
51
|
-
ts_end:
|
|
62
|
+
ts_start: float,
|
|
63
|
+
ts_end: float,
|
|
52
64
|
collector_id: str,
|
|
53
65
|
data_type: list[Literal["update", "rib"]],
|
|
54
66
|
cache_dir: str | None,
|
|
55
67
|
filters: dict = {},
|
|
56
68
|
max_concurrent_downloads: int = 10,
|
|
69
|
+
chunk_time: float | None = datetime.timedelta(hours=2).seconds,
|
|
57
70
|
):
|
|
58
71
|
self.ts_start = ts_start
|
|
59
72
|
self.ts_end = ts_end
|
|
60
73
|
self.collector_id = collector_id
|
|
61
74
|
self.data_type = data_type
|
|
62
|
-
self.cache_dir =
|
|
75
|
+
self.cache_dir: Directory | TemporaryDirectory = (
|
|
76
|
+
Directory(cache_dir) if cache_dir else TemporaryDirectory()
|
|
77
|
+
)
|
|
63
78
|
self.filters = filters
|
|
64
79
|
self.max_concurrent_downloads = max_concurrent_downloads
|
|
80
|
+
self.chunk_time = chunk_time
|
|
65
81
|
|
|
66
82
|
self.broker = bgpkit.Broker()
|
|
67
83
|
|
|
@@ -70,7 +86,6 @@ class BGPKITStream:
|
|
|
70
86
|
"""Generate a cache filename compatible with BGPKIT parser."""
|
|
71
87
|
|
|
72
88
|
hash_suffix = crc32(url)
|
|
73
|
-
print(url)
|
|
74
89
|
|
|
75
90
|
if "updates." in url:
|
|
76
91
|
data_type = "updates"
|
|
@@ -140,7 +155,7 @@ class BGPKITStream:
|
|
|
140
155
|
for rc, rc_urls in self.urls[data_type].items():
|
|
141
156
|
for url in rc_urls:
|
|
142
157
|
filename = self._generate_cache_filename(url)
|
|
143
|
-
filepath = os.path.join(self.cache_dir, filename)
|
|
158
|
+
filepath = os.path.join(self.cache_dir.name, filename)
|
|
144
159
|
|
|
145
160
|
if os.path.exists(filepath):
|
|
146
161
|
logging.debug(f"{filepath} is a cache hit")
|
|
@@ -171,47 +186,79 @@ class BGPKITStream:
|
|
|
171
186
|
return ((elem.timestamp, elem, is_rib, collector) for elem in iterator)
|
|
172
187
|
|
|
173
188
|
def __iter__(self) -> Iterator[BGPElement]:
|
|
174
|
-
|
|
189
|
+
# try/finally to cleanup the fetching cache
|
|
190
|
+
try:
|
|
191
|
+
# Manager mode: spawn smaller worker streams to balance fetch/parse
|
|
192
|
+
if self.chunk_time:
|
|
193
|
+
current = self.ts_start
|
|
194
|
+
|
|
195
|
+
while current < self.ts_end:
|
|
196
|
+
chunk_end = min(current + self.chunk_time, self.ts_end)
|
|
197
|
+
|
|
198
|
+
logging.info(
|
|
199
|
+
f"Processing chunk: {datetime.datetime.fromtimestamp(current)} "
|
|
200
|
+
f"to {datetime.datetime.fromtimestamp(chunk_end)}"
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
worker = type(self)(
|
|
204
|
+
ts_start=current,
|
|
205
|
+
ts_end=chunk_end
|
|
206
|
+
- 1, # remove one second because BGPKIT include border
|
|
207
|
+
collector_id=self.collector_id,
|
|
208
|
+
data_type=self.data_type,
|
|
209
|
+
cache_dir=None,
|
|
210
|
+
filters=self.filters,
|
|
211
|
+
max_concurrent_downloads=self.max_concurrent_downloads,
|
|
212
|
+
chunk_time=None, # Worker doesn't chunk itself
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
yield from worker
|
|
216
|
+
current = chunk_end + 1e-7
|
|
217
|
+
|
|
218
|
+
return
|
|
219
|
+
|
|
220
|
+
self._set_urls()
|
|
175
221
|
|
|
176
|
-
if self.cache_dir:
|
|
177
222
|
asyncio.run(self._prefetch_data())
|
|
178
223
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
224
|
+
# One iterator for each data_type * collector combinations
|
|
225
|
+
# To be merged according to the elements timestamp
|
|
226
|
+
iterators_to_merge = []
|
|
182
227
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
228
|
+
for data_type in self.data_type:
|
|
229
|
+
is_rib = data_type == "rib"
|
|
230
|
+
|
|
231
|
+
# Get rib or update files per collector
|
|
232
|
+
rc_to_paths = self.paths[data_type]
|
|
233
|
+
|
|
234
|
+
# Chain rib or update iterators to get one stream per collector / data_type
|
|
235
|
+
for rc, paths in rc_to_paths.items():
|
|
236
|
+
parsers = [
|
|
237
|
+
bgpkit.Parser(url=path, filters=self.filters) for path in paths
|
|
238
|
+
]
|
|
239
|
+
|
|
240
|
+
chained_iterator = chain.from_iterable(parsers)
|
|
241
|
+
|
|
242
|
+
# Add metadata lost by bgpkit for compatibility with pubgpstream
|
|
243
|
+
iterators_to_merge.append((chained_iterator, is_rib, rc))
|
|
244
|
+
|
|
245
|
+
# Make a generator to tag each bgpkit element with metadata
|
|
246
|
+
# Benefit 1: full compat with pybgpstream
|
|
247
|
+
# Benefit 2: we give a key easy to access for heapq to merge
|
|
248
|
+
tagged_iterators = [
|
|
249
|
+
self._create_tagged_iterator(it, is_rib, rc)
|
|
250
|
+
for it, is_rib, rc in iterators_to_merge
|
|
251
|
+
]
|
|
252
|
+
|
|
253
|
+
# Merge and convert to pybgpstream format
|
|
254
|
+
for timestamp, bgpkit_elem, is_rib, rc in merge(
|
|
255
|
+
*tagged_iterators, key=itemgetter(0)
|
|
256
|
+
):
|
|
257
|
+
if self.ts_start <= timestamp <= self.ts_end:
|
|
258
|
+
yield convert_bgpkit_elem(bgpkit_elem, is_rib, rc)
|
|
259
|
+
|
|
260
|
+
finally:
|
|
261
|
+
self.cache_dir.cleanup()
|
|
215
262
|
|
|
216
263
|
@classmethod
|
|
217
264
|
def from_config(cls, config: BGPStreamConfig):
|
|
@@ -229,4 +276,5 @@ class BGPKITStream:
|
|
|
229
276
|
max_concurrent_downloads=config.max_concurrent_downloads
|
|
230
277
|
if config.max_concurrent_downloads
|
|
231
278
|
else 10,
|
|
279
|
+
chunk_time=config.chunk_time.seconds if config.chunk_time else None,
|
|
232
280
|
)
|
|
@@ -63,4 +63,10 @@ class BGPStreamConfig(BaseModel):
|
|
|
63
63
|
description="Specifies the directory for caching downloaded files.",
|
|
64
64
|
)
|
|
65
65
|
filters: FilterOptions | None = Field(default=None, description="Optional filters")
|
|
66
|
-
max_concurrent_downloads: int | None = Field(
|
|
66
|
+
max_concurrent_downloads: int | None = Field(
|
|
67
|
+
default=None, description="Maximum concurrent downloads when caching"
|
|
68
|
+
)
|
|
69
|
+
chunk_time: datetime.timedelta | None = Field(
|
|
70
|
+
default=datetime.timedelta(hours=2),
|
|
71
|
+
description="Interval for the fetch/parse cycle (avoid long prefetch time)",
|
|
72
|
+
)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
pybgpkitstream/__init__.py,sha256=kNfv6bvDkaGKjlw0pr9LWVqOQtIGmIPk-VG1ZCBuA38,163
|
|
2
|
+
pybgpkitstream/bgpelement.py,sha256=7mXSUmWThhIbKy2JVsLchoteve0BshT3uH8cdbAe0Go,1176
|
|
3
|
+
pybgpkitstream/bgpkitstream.py,sha256=n68cE4IPI1Lir0Zjkd8R0yPAjZg3GbbFZEEH7BbUIB4,10708
|
|
4
|
+
pybgpkitstream/bgpstreamconfig.py,sha256=_PHoNhq8lw4QzNKya-KQFQ24dEbTjTkmefFhx0t6K8Q,2873
|
|
5
|
+
pybgpkitstream/cli.py,sha256=E0E1hO0fzGhy1skBopRufdewsiSy_mA-J8Gf2WxBRxo,4214
|
|
6
|
+
pybgpkitstream/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
pybgpkitstream-0.1.4.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
|
|
8
|
+
pybgpkitstream-0.1.4.dist-info/entry_points.txt,sha256=aWhImGlXLtRKkfyJHudcbSp5K5As4ZGL8wXZC0y6q4o,60
|
|
9
|
+
pybgpkitstream-0.1.4.dist-info/METADATA,sha256=scEKSheKIwDh35g4AGgz5dD_ba1RSKlTEuG-vj5QXBY,2953
|
|
10
|
+
pybgpkitstream-0.1.4.dist-info/RECORD,,
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
pybgpkitstream/__init__.py,sha256=kNfv6bvDkaGKjlw0pr9LWVqOQtIGmIPk-VG1ZCBuA38,163
|
|
2
|
-
pybgpkitstream/bgpelement.py,sha256=7mXSUmWThhIbKy2JVsLchoteve0BshT3uH8cdbAe0Go,1176
|
|
3
|
-
pybgpkitstream/bgpkitstream.py,sha256=PFtxn-xhKdMtsMBcY0CQ_V56JqHq0r0swxOQGhL-wy0,8839
|
|
4
|
-
pybgpkitstream/bgpstreamconfig.py,sha256=3Kw3imRh6r8d6lblJ5iKEo48qZipEl30frXR0Fts-CY,2672
|
|
5
|
-
pybgpkitstream/cli.py,sha256=E0E1hO0fzGhy1skBopRufdewsiSy_mA-J8Gf2WxBRxo,4214
|
|
6
|
-
pybgpkitstream/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
pybgpkitstream-0.1.2.dist-info/WHEEL,sha256=-neZj6nU9KAMg2CnCY6T3w8J53nx1kFGw_9HfoSzM60,79
|
|
8
|
-
pybgpkitstream-0.1.2.dist-info/entry_points.txt,sha256=aWhImGlXLtRKkfyJHudcbSp5K5As4ZGL8wXZC0y6q4o,60
|
|
9
|
-
pybgpkitstream-0.1.2.dist-info/METADATA,sha256=S4Iq-Q5wTATfdd5zK_B4YS4s1vOzJ716ShS7F3TjalQ,2953
|
|
10
|
-
pybgpkitstream-0.1.2.dist-info/RECORD,,
|
|
File without changes
|