pybgpkitstream 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pybgpkitstream
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Drop-in replacement for PyBGPStream using BGPKIT
5
5
  Author: JustinLoye
6
6
  Author-email: JustinLoye <jloye@iij.ad.jp>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pybgpkitstream"
3
- version = "0.1.2"
3
+ version = "0.1.3"
4
4
  description = "Drop-in replacement for PyBGPStream using BGPKIT"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -47,13 +47,14 @@ def crc32(input_str: str):
47
47
  class BGPKITStream:
48
48
  def __init__(
49
49
  self,
50
- ts_start: datetime.datetime,
51
- ts_end: datetime.datetime,
50
+ ts_start: float,
51
+ ts_end: float,
52
52
  collector_id: str,
53
53
  data_type: list[Literal["update", "rib"]],
54
54
  cache_dir: str | None,
55
55
  filters: dict = {},
56
56
  max_concurrent_downloads: int = 10,
57
+ chunk_time: float | None = datetime.timedelta(hours=2).seconds,
57
58
  ):
58
59
  self.ts_start = ts_start
59
60
  self.ts_end = ts_end
@@ -62,6 +63,7 @@ class BGPKITStream:
62
63
  self.cache_dir = cache_dir
63
64
  self.filters = filters
64
65
  self.max_concurrent_downloads = max_concurrent_downloads
66
+ self.chunk_time = chunk_time
65
67
 
66
68
  self.broker = bgpkit.Broker()
67
69
 
@@ -171,6 +173,35 @@ class BGPKITStream:
171
173
  return ((elem.timestamp, elem, is_rib, collector) for elem in iterator)
172
174
 
173
175
  def __iter__(self) -> Iterator[BGPElement]:
176
+ # Manager mode: spawn smaller worker streams to balance fetch/parse
177
+ if self.chunk_time:
178
+ current = self.ts_start
179
+
180
+ while current < self.ts_end:
181
+ chunk_end = min(current + self.chunk_time, self.ts_end)
182
+
183
+ logging.info(
184
+ f"Processing chunk: {datetime.datetime.fromtimestamp(current)} "
185
+ f"to {datetime.datetime.fromtimestamp(chunk_end)}"
186
+ )
187
+
188
+ worker = type(self)(
189
+ ts_start=current,
190
+ ts_end=chunk_end
191
+ - 1, # remove one second because BGPKIT include border
192
+ collector_id=self.collector_id,
193
+ data_type=self.data_type,
194
+ cache_dir=self.cache_dir,
195
+ filters=self.filters,
196
+ max_concurrent_downloads=self.max_concurrent_downloads,
197
+ chunk_time=None, # Worker doesn't chunk itself
198
+ )
199
+
200
+ yield from worker
201
+ current = chunk_end + 1e-7
202
+
203
+ return
204
+
174
205
  self._set_urls()
175
206
 
176
207
  if self.cache_dir:
@@ -229,4 +260,5 @@ class BGPKITStream:
229
260
  max_concurrent_downloads=config.max_concurrent_downloads
230
261
  if config.max_concurrent_downloads
231
262
  else 10,
263
+ chunk_time=config.chunk_time.seconds if config.chunk_time else None,
232
264
  )
@@ -63,4 +63,10 @@ class BGPStreamConfig(BaseModel):
63
63
  description="Specifies the directory for caching downloaded files.",
64
64
  )
65
65
  filters: FilterOptions | None = Field(default=None, description="Optional filters")
66
- max_concurrent_downloads: int | None = Field(default=None, description="Maximum concurrent downloads when caching")
66
+ max_concurrent_downloads: int | None = Field(
67
+ default=None, description="Maximum concurrent downloads when caching"
68
+ )
69
+ chunk_time: datetime.timedelta | None = Field(
70
+ default=datetime.timedelta(hours=2),
71
+ description="Interval for the fetch/parse cycle (avoid long prefetch time)",
72
+ )
File without changes