pybgpkitstream 0.1.3__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pybgpkitstream-0.1.3 → pybgpkitstream-0.1.4}/PKG-INFO +1 -1
- {pybgpkitstream-0.1.3 → pybgpkitstream-0.1.4}/pyproject.toml +1 -1
- {pybgpkitstream-0.1.3 → pybgpkitstream-0.1.4}/src/pybgpkitstream/bgpkitstream.py +85 -69
- {pybgpkitstream-0.1.3 → pybgpkitstream-0.1.4}/README.md +0 -0
- {pybgpkitstream-0.1.3 → pybgpkitstream-0.1.4}/src/pybgpkitstream/__init__.py +0 -0
- {pybgpkitstream-0.1.3 → pybgpkitstream-0.1.4}/src/pybgpkitstream/bgpelement.py +0 -0
- {pybgpkitstream-0.1.3 → pybgpkitstream-0.1.4}/src/pybgpkitstream/bgpstreamconfig.py +0 -0
- {pybgpkitstream-0.1.3 → pybgpkitstream-0.1.4}/src/pybgpkitstream/cli.py +0 -0
- {pybgpkitstream-0.1.3 → pybgpkitstream-0.1.4}/src/pybgpkitstream/py.typed +0 -0
|
@@ -9,6 +9,7 @@ from heapq import merge
|
|
|
9
9
|
from operator import itemgetter
|
|
10
10
|
import binascii
|
|
11
11
|
import logging
|
|
12
|
+
from tempfile import TemporaryDirectory
|
|
12
13
|
|
|
13
14
|
import aiohttp
|
|
14
15
|
import bgpkit
|
|
@@ -44,6 +45,17 @@ def crc32(input_str: str):
|
|
|
44
45
|
return f"{crc:08x}"
|
|
45
46
|
|
|
46
47
|
|
|
48
|
+
class Directory:
|
|
49
|
+
"""Permanent directory that mimics TemporaryDirectory interface."""
|
|
50
|
+
|
|
51
|
+
def __init__(self, path):
|
|
52
|
+
self.name = str(path)
|
|
53
|
+
|
|
54
|
+
def cleanup(self):
|
|
55
|
+
"""No-op cleanup for permanent directories."""
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
|
|
47
59
|
class BGPKITStream:
|
|
48
60
|
def __init__(
|
|
49
61
|
self,
|
|
@@ -60,7 +72,9 @@ class BGPKITStream:
|
|
|
60
72
|
self.ts_end = ts_end
|
|
61
73
|
self.collector_id = collector_id
|
|
62
74
|
self.data_type = data_type
|
|
63
|
-
self.cache_dir =
|
|
75
|
+
self.cache_dir: Directory | TemporaryDirectory = (
|
|
76
|
+
Directory(cache_dir) if cache_dir else TemporaryDirectory()
|
|
77
|
+
)
|
|
64
78
|
self.filters = filters
|
|
65
79
|
self.max_concurrent_downloads = max_concurrent_downloads
|
|
66
80
|
self.chunk_time = chunk_time
|
|
@@ -72,7 +86,6 @@ class BGPKITStream:
|
|
|
72
86
|
"""Generate a cache filename compatible with BGPKIT parser."""
|
|
73
87
|
|
|
74
88
|
hash_suffix = crc32(url)
|
|
75
|
-
print(url)
|
|
76
89
|
|
|
77
90
|
if "updates." in url:
|
|
78
91
|
data_type = "updates"
|
|
@@ -142,7 +155,7 @@ class BGPKITStream:
|
|
|
142
155
|
for rc, rc_urls in self.urls[data_type].items():
|
|
143
156
|
for url in rc_urls:
|
|
144
157
|
filename = self._generate_cache_filename(url)
|
|
145
|
-
filepath = os.path.join(self.cache_dir, filename)
|
|
158
|
+
filepath = os.path.join(self.cache_dir.name, filename)
|
|
146
159
|
|
|
147
160
|
if os.path.exists(filepath):
|
|
148
161
|
logging.debug(f"{filepath} is a cache hit")
|
|
@@ -173,76 +186,79 @@ class BGPKITStream:
|
|
|
173
186
|
return ((elem.timestamp, elem, is_rib, collector) for elem in iterator)
|
|
174
187
|
|
|
175
188
|
def __iter__(self) -> Iterator[BGPElement]:
|
|
176
|
-
#
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
189
|
+
# try/finally to cleanup the fetching cache
|
|
190
|
+
try:
|
|
191
|
+
# Manager mode: spawn smaller worker streams to balance fetch/parse
|
|
192
|
+
if self.chunk_time:
|
|
193
|
+
current = self.ts_start
|
|
194
|
+
|
|
195
|
+
while current < self.ts_end:
|
|
196
|
+
chunk_end = min(current + self.chunk_time, self.ts_end)
|
|
197
|
+
|
|
198
|
+
logging.info(
|
|
199
|
+
f"Processing chunk: {datetime.datetime.fromtimestamp(current)} "
|
|
200
|
+
f"to {datetime.datetime.fromtimestamp(chunk_end)}"
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
worker = type(self)(
|
|
204
|
+
ts_start=current,
|
|
205
|
+
ts_end=chunk_end
|
|
206
|
+
- 1, # remove one second because BGPKIT include border
|
|
207
|
+
collector_id=self.collector_id,
|
|
208
|
+
data_type=self.data_type,
|
|
209
|
+
cache_dir=None,
|
|
210
|
+
filters=self.filters,
|
|
211
|
+
max_concurrent_downloads=self.max_concurrent_downloads,
|
|
212
|
+
chunk_time=None, # Worker doesn't chunk itself
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
yield from worker
|
|
216
|
+
current = chunk_end + 1e-7
|
|
217
|
+
|
|
218
|
+
return
|
|
219
|
+
|
|
220
|
+
self._set_urls()
|
|
202
221
|
|
|
203
|
-
return
|
|
204
|
-
|
|
205
|
-
self._set_urls()
|
|
206
|
-
|
|
207
|
-
if self.cache_dir:
|
|
208
222
|
asyncio.run(self._prefetch_data())
|
|
209
223
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
224
|
+
# One iterator for each data_type * collector combinations
|
|
225
|
+
# To be merged according to the elements timestamp
|
|
226
|
+
iterators_to_merge = []
|
|
213
227
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
228
|
+
for data_type in self.data_type:
|
|
229
|
+
is_rib = data_type == "rib"
|
|
230
|
+
|
|
231
|
+
# Get rib or update files per collector
|
|
232
|
+
rc_to_paths = self.paths[data_type]
|
|
233
|
+
|
|
234
|
+
# Chain rib or update iterators to get one stream per collector / data_type
|
|
235
|
+
for rc, paths in rc_to_paths.items():
|
|
236
|
+
parsers = [
|
|
237
|
+
bgpkit.Parser(url=path, filters=self.filters) for path in paths
|
|
238
|
+
]
|
|
239
|
+
|
|
240
|
+
chained_iterator = chain.from_iterable(parsers)
|
|
241
|
+
|
|
242
|
+
# Add metadata lost by bgpkit for compatibility with pubgpstream
|
|
243
|
+
iterators_to_merge.append((chained_iterator, is_rib, rc))
|
|
244
|
+
|
|
245
|
+
# Make a generator to tag each bgpkit element with metadata
|
|
246
|
+
# Benefit 1: full compat with pybgpstream
|
|
247
|
+
# Benefit 2: we give a key easy to access for heapq to merge
|
|
248
|
+
tagged_iterators = [
|
|
249
|
+
self._create_tagged_iterator(it, is_rib, rc)
|
|
250
|
+
for it, is_rib, rc in iterators_to_merge
|
|
251
|
+
]
|
|
252
|
+
|
|
253
|
+
# Merge and convert to pybgpstream format
|
|
254
|
+
for timestamp, bgpkit_elem, is_rib, rc in merge(
|
|
255
|
+
*tagged_iterators, key=itemgetter(0)
|
|
256
|
+
):
|
|
257
|
+
if self.ts_start <= timestamp <= self.ts_end:
|
|
258
|
+
yield convert_bgpkit_elem(bgpkit_elem, is_rib, rc)
|
|
259
|
+
|
|
260
|
+
finally:
|
|
261
|
+
self.cache_dir.cleanup()
|
|
246
262
|
|
|
247
263
|
@classmethod
|
|
248
264
|
def from_config(cls, config: BGPStreamConfig):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|