pybgpkitstream 0.1.3__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pybgpkitstream
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Drop-in replacement for PyBGPStream using BGPKIT
5
5
  Author: JustinLoye
6
6
  Author-email: JustinLoye <jloye@iij.ad.jp>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pybgpkitstream"
3
- version = "0.1.3"
3
+ version = "0.1.4"
4
4
  description = "Drop-in replacement for PyBGPStream using BGPKIT"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -9,6 +9,7 @@ from heapq import merge
9
9
  from operator import itemgetter
10
10
  import binascii
11
11
  import logging
12
+ from tempfile import TemporaryDirectory
12
13
 
13
14
  import aiohttp
14
15
  import bgpkit
@@ -44,6 +45,17 @@ def crc32(input_str: str):
44
45
  return f"{crc:08x}"
45
46
 
46
47
 
48
+ class Directory:
49
+ """Permanent directory that mimics TemporaryDirectory interface."""
50
+
51
+ def __init__(self, path):
52
+ self.name = str(path)
53
+
54
+ def cleanup(self):
55
+ """No-op cleanup for permanent directories."""
56
+ pass
57
+
58
+
47
59
  class BGPKITStream:
48
60
  def __init__(
49
61
  self,
@@ -60,7 +72,9 @@ class BGPKITStream:
60
72
  self.ts_end = ts_end
61
73
  self.collector_id = collector_id
62
74
  self.data_type = data_type
63
- self.cache_dir = cache_dir
75
+ self.cache_dir: Directory | TemporaryDirectory = (
76
+ Directory(cache_dir) if cache_dir else TemporaryDirectory()
77
+ )
64
78
  self.filters = filters
65
79
  self.max_concurrent_downloads = max_concurrent_downloads
66
80
  self.chunk_time = chunk_time
@@ -72,7 +86,6 @@ class BGPKITStream:
72
86
  """Generate a cache filename compatible with BGPKIT parser."""
73
87
 
74
88
  hash_suffix = crc32(url)
75
- print(url)
76
89
 
77
90
  if "updates." in url:
78
91
  data_type = "updates"
@@ -142,7 +155,7 @@ class BGPKITStream:
142
155
  for rc, rc_urls in self.urls[data_type].items():
143
156
  for url in rc_urls:
144
157
  filename = self._generate_cache_filename(url)
145
- filepath = os.path.join(self.cache_dir, filename)
158
+ filepath = os.path.join(self.cache_dir.name, filename)
146
159
 
147
160
  if os.path.exists(filepath):
148
161
  logging.debug(f"{filepath} is a cache hit")
@@ -173,76 +186,79 @@ class BGPKITStream:
173
186
  return ((elem.timestamp, elem, is_rib, collector) for elem in iterator)
174
187
 
175
188
  def __iter__(self) -> Iterator[BGPElement]:
176
- # Manager mode: spawn smaller worker streams to balance fetch/parse
177
- if self.chunk_time:
178
- current = self.ts_start
179
-
180
- while current < self.ts_end:
181
- chunk_end = min(current + self.chunk_time, self.ts_end)
182
-
183
- logging.info(
184
- f"Processing chunk: {datetime.datetime.fromtimestamp(current)} "
185
- f"to {datetime.datetime.fromtimestamp(chunk_end)}"
186
- )
187
-
188
- worker = type(self)(
189
- ts_start=current,
190
- ts_end=chunk_end
191
- - 1, # remove one second because BGPKIT include border
192
- collector_id=self.collector_id,
193
- data_type=self.data_type,
194
- cache_dir=self.cache_dir,
195
- filters=self.filters,
196
- max_concurrent_downloads=self.max_concurrent_downloads,
197
- chunk_time=None, # Worker doesn't chunk itself
198
- )
199
-
200
- yield from worker
201
- current = chunk_end + 1e-7
189
+ # try/finally to cleanup the fetching cache
190
+ try:
191
+ # Manager mode: spawn smaller worker streams to balance fetch/parse
192
+ if self.chunk_time:
193
+ current = self.ts_start
194
+
195
+ while current < self.ts_end:
196
+ chunk_end = min(current + self.chunk_time, self.ts_end)
197
+
198
+ logging.info(
199
+ f"Processing chunk: {datetime.datetime.fromtimestamp(current)} "
200
+ f"to {datetime.datetime.fromtimestamp(chunk_end)}"
201
+ )
202
+
203
+ worker = type(self)(
204
+ ts_start=current,
205
+ ts_end=chunk_end
206
+ - 1, # remove one second because BGPKIT include border
207
+ collector_id=self.collector_id,
208
+ data_type=self.data_type,
209
+ cache_dir=None,
210
+ filters=self.filters,
211
+ max_concurrent_downloads=self.max_concurrent_downloads,
212
+ chunk_time=None, # Worker doesn't chunk itself
213
+ )
214
+
215
+ yield from worker
216
+ current = chunk_end + 1e-7
217
+
218
+ return
219
+
220
+ self._set_urls()
202
221
 
203
- return
204
-
205
- self._set_urls()
206
-
207
- if self.cache_dir:
208
222
  asyncio.run(self._prefetch_data())
209
223
 
210
- # One iterator for each data_type * collector combinations
211
- # To be merged according to the elements timestamp
212
- iterators_to_merge = []
224
+ # One iterator for each data_type * collector combinations
225
+ # To be merged according to the elements timestamp
226
+ iterators_to_merge = []
213
227
 
214
- for data_type in self.data_type:
215
- is_rib = data_type == "rib"
216
-
217
- # Get rib or update files per collector
218
- if self.cache_dir:
219
- rc_to_urls = self.paths[data_type]
220
- else:
221
- rc_to_urls = self.urls[data_type]
222
-
223
- # Chain rib or update iterators to get one stream per collector / data_type
224
- for rc, urls in rc_to_urls.items():
225
- parsers = [bgpkit.Parser(url=url, filters=self.filters) for url in urls]
226
-
227
- chained_iterator = chain.from_iterable(parsers)
228
-
229
- # Add metadata lost by bgpkit for compatibility with pubgpstream
230
- iterators_to_merge.append((chained_iterator, is_rib, rc))
231
-
232
- # Make a generator to tag each bgpkit element with metadata
233
- # Benefit 1: full compat with pybgpstream
234
- # Benefit 2: we give a key easy to access for heapq to merge
235
- tagged_iterators = [
236
- self._create_tagged_iterator(it, is_rib, rc)
237
- for it, is_rib, rc in iterators_to_merge
238
- ]
239
-
240
- # Merge and convert to pybgpstream format
241
- for timestamp, bgpkit_elem, is_rib, rc in merge(
242
- *tagged_iterators, key=itemgetter(0)
243
- ):
244
- if self.ts_start <= timestamp <= self.ts_end:
245
- yield convert_bgpkit_elem(bgpkit_elem, is_rib, rc)
228
+ for data_type in self.data_type:
229
+ is_rib = data_type == "rib"
230
+
231
+ # Get rib or update files per collector
232
+ rc_to_paths = self.paths[data_type]
233
+
234
+ # Chain rib or update iterators to get one stream per collector / data_type
235
+ for rc, paths in rc_to_paths.items():
236
+ parsers = [
237
+ bgpkit.Parser(url=path, filters=self.filters) for path in paths
238
+ ]
239
+
240
+ chained_iterator = chain.from_iterable(parsers)
241
+
242
+ # Add metadata lost by bgpkit for compatibility with pubgpstream
243
+ iterators_to_merge.append((chained_iterator, is_rib, rc))
244
+
245
+ # Make a generator to tag each bgpkit element with metadata
246
+ # Benefit 1: full compat with pybgpstream
247
+ # Benefit 2: we give a key easy to access for heapq to merge
248
+ tagged_iterators = [
249
+ self._create_tagged_iterator(it, is_rib, rc)
250
+ for it, is_rib, rc in iterators_to_merge
251
+ ]
252
+
253
+ # Merge and convert to pybgpstream format
254
+ for timestamp, bgpkit_elem, is_rib, rc in merge(
255
+ *tagged_iterators, key=itemgetter(0)
256
+ ):
257
+ if self.ts_start <= timestamp <= self.ts_end:
258
+ yield convert_bgpkit_elem(bgpkit_elem, is_rib, rc)
259
+
260
+ finally:
261
+ self.cache_dir.cleanup()
246
262
 
247
263
  @classmethod
248
264
  def from_config(cls, config: BGPStreamConfig):
File without changes