pybgpkitstream 0.1.1__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pybgpkitstream-0.1.1 → pybgpkitstream-0.1.3}/PKG-INFO +1 -1
- {pybgpkitstream-0.1.1 → pybgpkitstream-0.1.3}/pyproject.toml +1 -1
- {pybgpkitstream-0.1.1 → pybgpkitstream-0.1.3}/src/pybgpkitstream/bgpkitstream.py +41 -5
- {pybgpkitstream-0.1.1 → pybgpkitstream-0.1.3}/src/pybgpkitstream/bgpstreamconfig.py +7 -0
- {pybgpkitstream-0.1.1 → pybgpkitstream-0.1.3}/README.md +0 -0
- {pybgpkitstream-0.1.1 → pybgpkitstream-0.1.3}/src/pybgpkitstream/__init__.py +0 -0
- {pybgpkitstream-0.1.1 → pybgpkitstream-0.1.3}/src/pybgpkitstream/bgpelement.py +0 -0
- {pybgpkitstream-0.1.1 → pybgpkitstream-0.1.3}/src/pybgpkitstream/cli.py +0 -0
- {pybgpkitstream-0.1.1 → pybgpkitstream-0.1.3}/src/pybgpkitstream/py.typed +0 -0
|
@@ -47,12 +47,14 @@ def crc32(input_str: str):
|
|
|
47
47
|
class BGPKITStream:
|
|
48
48
|
def __init__(
|
|
49
49
|
self,
|
|
50
|
-
ts_start:
|
|
51
|
-
ts_end:
|
|
50
|
+
ts_start: float,
|
|
51
|
+
ts_end: float,
|
|
52
52
|
collector_id: str,
|
|
53
53
|
data_type: list[Literal["update", "rib"]],
|
|
54
54
|
cache_dir: str | None,
|
|
55
55
|
filters: dict = {},
|
|
56
|
+
max_concurrent_downloads: int = 10,
|
|
57
|
+
chunk_time: float | None = datetime.timedelta(hours=2).seconds,
|
|
56
58
|
):
|
|
57
59
|
self.ts_start = ts_start
|
|
58
60
|
self.ts_end = ts_end
|
|
@@ -60,6 +62,8 @@ class BGPKITStream:
|
|
|
60
62
|
self.data_type = data_type
|
|
61
63
|
self.cache_dir = cache_dir
|
|
62
64
|
self.filters = filters
|
|
65
|
+
self.max_concurrent_downloads = max_concurrent_downloads
|
|
66
|
+
self.chunk_time = chunk_time
|
|
63
67
|
|
|
64
68
|
self.broker = bgpkit.Broker()
|
|
65
69
|
|
|
@@ -129,8 +133,7 @@ class BGPKITStream:
|
|
|
129
133
|
self.paths = {"rib": defaultdict(list), "update": defaultdict(list)}
|
|
130
134
|
tasks = []
|
|
131
135
|
|
|
132
|
-
|
|
133
|
-
semaphore = asyncio.Semaphore(CONCURRENT_DOWNLOADS)
|
|
136
|
+
semaphore = asyncio.Semaphore(self.max_concurrent_downloads)
|
|
134
137
|
|
|
135
138
|
conn = aiohttp.TCPConnector()
|
|
136
139
|
async with aiohttp.ClientSession(connector=conn) as session:
|
|
@@ -154,7 +157,7 @@ class BGPKITStream:
|
|
|
154
157
|
|
|
155
158
|
if tasks:
|
|
156
159
|
logging.info(
|
|
157
|
-
f"Starting download of {len(tasks)} files with a concurrency of {
|
|
160
|
+
f"Starting download of {len(tasks)} files with a concurrency of {self.max_concurrent_downloads}..."
|
|
158
161
|
)
|
|
159
162
|
results = await asyncio.gather(*tasks)
|
|
160
163
|
|
|
@@ -170,6 +173,35 @@ class BGPKITStream:
|
|
|
170
173
|
return ((elem.timestamp, elem, is_rib, collector) for elem in iterator)
|
|
171
174
|
|
|
172
175
|
def __iter__(self) -> Iterator[BGPElement]:
|
|
176
|
+
# Manager mode: spawn smaller worker streams to balance fetch/parse
|
|
177
|
+
if self.chunk_time:
|
|
178
|
+
current = self.ts_start
|
|
179
|
+
|
|
180
|
+
while current < self.ts_end:
|
|
181
|
+
chunk_end = min(current + self.chunk_time, self.ts_end)
|
|
182
|
+
|
|
183
|
+
logging.info(
|
|
184
|
+
f"Processing chunk: {datetime.datetime.fromtimestamp(current)} "
|
|
185
|
+
f"to {datetime.datetime.fromtimestamp(chunk_end)}"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
worker = type(self)(
|
|
189
|
+
ts_start=current,
|
|
190
|
+
ts_end=chunk_end
|
|
191
|
+
- 1, # remove one second because BGPKIT include border
|
|
192
|
+
collector_id=self.collector_id,
|
|
193
|
+
data_type=self.data_type,
|
|
194
|
+
cache_dir=self.cache_dir,
|
|
195
|
+
filters=self.filters,
|
|
196
|
+
max_concurrent_downloads=self.max_concurrent_downloads,
|
|
197
|
+
chunk_time=None, # Worker doesn't chunk itself
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
yield from worker
|
|
201
|
+
current = chunk_end + 1e-7
|
|
202
|
+
|
|
203
|
+
return
|
|
204
|
+
|
|
173
205
|
self._set_urls()
|
|
174
206
|
|
|
175
207
|
if self.cache_dir:
|
|
@@ -225,4 +257,8 @@ class BGPKITStream:
|
|
|
225
257
|
filters=config.filters.model_dump(exclude_unset=True)
|
|
226
258
|
if config.filters
|
|
227
259
|
else {},
|
|
260
|
+
max_concurrent_downloads=config.max_concurrent_downloads
|
|
261
|
+
if config.max_concurrent_downloads
|
|
262
|
+
else 10,
|
|
263
|
+
chunk_time=config.chunk_time.seconds if config.chunk_time else None,
|
|
228
264
|
)
|
|
@@ -63,3 +63,10 @@ class BGPStreamConfig(BaseModel):
|
|
|
63
63
|
description="Specifies the directory for caching downloaded files.",
|
|
64
64
|
)
|
|
65
65
|
filters: FilterOptions | None = Field(default=None, description="Optional filters")
|
|
66
|
+
max_concurrent_downloads: int | None = Field(
|
|
67
|
+
default=None, description="Maximum concurrent downloads when caching"
|
|
68
|
+
)
|
|
69
|
+
chunk_time: datetime.timedelta | None = Field(
|
|
70
|
+
default=datetime.timedelta(hours=2),
|
|
71
|
+
description="Interval for the fetch/parse cycle (avoid long prefetch time)",
|
|
72
|
+
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|