p2pstore 0.1.7.dev1__tar.gz → 0.1.7.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {p2pstore-0.1.7.dev1/src/p2pstore.egg-info → p2pstore-0.1.7.dev2}/PKG-INFO +2 -2
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/pyproject.toml +2 -2
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/metadata/__init__.py +2 -3
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/metadata/etcd_client.py +58 -15
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/utils/config.py +8 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/utils/logger.py +2 -1
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2/src/p2pstore.egg-info}/PKG-INFO +2 -2
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/LICENSE +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/MANIFEST.in +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/README.md +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/setup.cfg +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/__init__.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/client.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/core/__init__.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/core/buffer_registry.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/core/metadata_client.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/core/metadata_server.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/core/object_types.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/core/transfer_request.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/core/transport.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/transport/__init__.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/transport/engine_adapter.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/transport/rdma_transport.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/transport/tcp_transport.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/transport/transfer_engine.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/utils/__init__.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/utils/common.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/utils/serialization.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore/utils/topology.py +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore.egg-info/SOURCES.txt +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore.egg-info/dependency_links.txt +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore.egg-info/requires.txt +0 -0
- {p2pstore-0.1.7.dev1 → p2pstore-0.1.7.dev2}/src/p2pstore.egg-info/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: p2pstore
|
|
3
|
-
Version: 0.1.7.
|
|
4
|
-
Summary: P2P Store - 基于 RDMA
|
|
3
|
+
Version: 0.1.7.dev2
|
|
4
|
+
Summary: P2P Store - 基于 RDMA 的高性能分布式数据传输系统,更改 ttl 设置
|
|
5
5
|
Author: Paddle
|
|
6
6
|
License-Expression: Apache-2.0
|
|
7
7
|
Keywords: p2p,rdma,distributed,tensor,transfer
|
|
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "p2pstore"
|
|
7
|
-
version = "0.1.7.
|
|
8
|
-
description = "P2P Store - 基于 RDMA
|
|
7
|
+
version = "0.1.7.dev2"
|
|
8
|
+
description = "P2P Store - 基于 RDMA 的高性能分布式数据传输系统,更改 ttl 设置"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "Apache-2.0"
|
|
11
11
|
requires-python = ">=3.10"
|
|
@@ -22,9 +22,7 @@ def create_metadata_client(
|
|
|
22
22
|
metadata_type = (config.metadata_type or "etcd").lower()
|
|
23
23
|
|
|
24
24
|
if metadata_type != "etcd":
|
|
25
|
-
raise ValueError(
|
|
26
|
-
f"不支持的元数据类型: {metadata_type}。当前版本仅支持 Etcd。"
|
|
27
|
-
)
|
|
25
|
+
raise ValueError(f"不支持的元数据类型: {metadata_type}。当前版本仅支持 Etcd。")
|
|
28
26
|
|
|
29
27
|
if EtcdMetadataClient is None:
|
|
30
28
|
raise ImportError("请先安装 etcd3 库: pip install etcd3")
|
|
@@ -38,6 +36,7 @@ def create_metadata_client(
|
|
|
38
36
|
client_id=client_id,
|
|
39
37
|
registered_keys=registered_keys,
|
|
40
38
|
enable_watch=config.enable_watch,
|
|
39
|
+
ttl=config.etcd_lease_ttl,
|
|
41
40
|
)
|
|
42
41
|
|
|
43
42
|
|
|
@@ -9,6 +9,7 @@ Etcd Metadata Client 实现模块.
|
|
|
9
9
|
|
|
10
10
|
import json
|
|
11
11
|
import os
|
|
12
|
+
import random
|
|
12
13
|
import threading
|
|
13
14
|
import time
|
|
14
15
|
from urllib.parse import urlparse
|
|
@@ -38,6 +39,7 @@ class EtcdMetadataClient(MetadataClient):
|
|
|
38
39
|
client_id: str,
|
|
39
40
|
registered_keys: dict[str, int] | None = None,
|
|
40
41
|
enable_watch: bool = True,
|
|
42
|
+
ttl: int = 3600,
|
|
41
43
|
):
|
|
42
44
|
self.logger = LoggerManager.get_logger("etcd-metadata-client")
|
|
43
45
|
self.local_ip = local_ip
|
|
@@ -49,6 +51,7 @@ class EtcdMetadataClient(MetadataClient):
|
|
|
49
51
|
self._registered_keys = registered_keys if registered_keys is not None else {}
|
|
50
52
|
self.etcd_endpoints = etcd_endpoints
|
|
51
53
|
self.enable_watch = enable_watch # 保存配置
|
|
54
|
+
self.ttl = ttl # 租约 TTL
|
|
52
55
|
|
|
53
56
|
# 连接 Etcd (目前简单取第一个地址,生产环境可做高可用轮询)
|
|
54
57
|
self._client = self._connect_etcd()
|
|
@@ -89,11 +92,11 @@ class EtcdMetadataClient(MetadataClient):
|
|
|
89
92
|
|
|
90
93
|
# 增加 gRPC 消息大小限制,解决大量文件时 list_files 超限问题
|
|
91
94
|
# grpc_message:"CLIENT: Received message larger than max (42587779 vs. 4194304)
|
|
92
|
-
# 默认 4MB ->
|
|
95
|
+
# 默认 4MB -> 1GB (与服务端 --max-request-bytes 保持一致)
|
|
93
96
|
# 注意:过大会增加内存占用和传输时间,建议根据实际需求调整
|
|
94
97
|
grpc_options = [
|
|
95
|
-
("grpc.max_send_message_length",
|
|
96
|
-
("grpc.max_receive_message_length",
|
|
98
|
+
("grpc.max_send_message_length", 1024 * 1024 * 1024),
|
|
99
|
+
("grpc.max_receive_message_length", 1024 * 1024 * 1024),
|
|
97
100
|
# 添加 gRPC 超时,防止网络分区时永久阻塞
|
|
98
101
|
("grpc.keepalive_time_ms", 10000), # 10秒发送keepalive
|
|
99
102
|
("grpc.keepalive_timeout_ms", 5000), # 5秒keepalive超时
|
|
@@ -297,7 +300,7 @@ class EtcdMetadataClient(MetadataClient):
|
|
|
297
300
|
return False
|
|
298
301
|
|
|
299
302
|
# 重新创建 Lease
|
|
300
|
-
self.lease = self._client.lease(ttl=
|
|
303
|
+
self.lease = self._client.lease(ttl=self.ttl)
|
|
301
304
|
self.lease_id = getattr(self.lease, "id", None) # 防御性获取
|
|
302
305
|
|
|
303
306
|
if not self.lease_id:
|
|
@@ -335,8 +338,8 @@ class EtcdMetadataClient(MetadataClient):
|
|
|
335
338
|
# 保存 host 地址,供 close() 使用
|
|
336
339
|
self.local_host = host
|
|
337
340
|
|
|
338
|
-
#
|
|
339
|
-
self.lease = self._client.lease(ttl=
|
|
341
|
+
# 创建租约
|
|
342
|
+
self.lease = self._client.lease(ttl=self.ttl)
|
|
340
343
|
# self.lease_id = self.lease.id
|
|
341
344
|
self.lease_id = getattr(self.lease, "id", None) # 统一使用防御性获取
|
|
342
345
|
if not self.lease_id:
|
|
@@ -347,7 +350,12 @@ class EtcdMetadataClient(MetadataClient):
|
|
|
347
350
|
|
|
348
351
|
# 写入并绑定租约(全局timeout已在client初始化时设置)
|
|
349
352
|
self._client.put(key, value, lease=self.lease)
|
|
350
|
-
self.logger.info(
|
|
353
|
+
self.logger.info(
|
|
354
|
+
"Provider 注册成功: host=%s, Lease ID=0x%x, TTL=%ds",
|
|
355
|
+
host,
|
|
356
|
+
self.lease_id,
|
|
357
|
+
self.ttl,
|
|
358
|
+
)
|
|
351
359
|
|
|
352
360
|
# 启动自动续租线程 (etcd3 库并未内置自动后台刷新,需要手动或使用 refresh)
|
|
353
361
|
# 实际上 python-etcd3 的 lease 对象没有自动 refresh thread,这里简单实现一个
|
|
@@ -381,7 +389,27 @@ class EtcdMetadataClient(MetadataClient):
|
|
|
381
389
|
try:
|
|
382
390
|
self.lease.refresh()
|
|
383
391
|
consecutive_failures = 0 # 成功后重置失败计数
|
|
384
|
-
|
|
392
|
+
|
|
393
|
+
# [新增日志] 打印续租成功状态
|
|
394
|
+
# 如果 TTL 较长(>60s) 或 调试模式,打印 INFO 日志,让用户安心
|
|
395
|
+
# 对于短 TTL,避免日志刷屏,仅在 DEBUG 级别打印
|
|
396
|
+
if self.ttl > 60:
|
|
397
|
+
self.logger.info(
|
|
398
|
+
"[ETCD-HEARTBEAT] 续租成功: lease_id=0x%x, ttl=%ds, host=%s, status=Active",
|
|
399
|
+
self.lease_id or 0,
|
|
400
|
+
self.ttl,
|
|
401
|
+
self.local_host,
|
|
402
|
+
)
|
|
403
|
+
else:
|
|
404
|
+
self.logger.debug(
|
|
405
|
+
"[ETCD-HEARTBEAT] 续租成功: lease_id=0x%x", self.lease_id
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
# 动态调整刷新间隔,保证在 TTL 过期前至少刷新 3 次
|
|
409
|
+
base_interval = max(1, self.ttl / 3)
|
|
410
|
+
# 添加 10% 的随机抖动,防止太多 client 同时发起请求 (Thundering Herd)
|
|
411
|
+
jitter = base_interval * 0.1 * (random.random() * 2 - 1)
|
|
412
|
+
time.sleep(base_interval + jitter)
|
|
385
413
|
|
|
386
414
|
except Exception as e:
|
|
387
415
|
consecutive_failures += 1
|
|
@@ -491,7 +519,8 @@ class EtcdMetadataClient(MetadataClient):
|
|
|
491
519
|
key = f"{self.PREFIX_FILE}{file_key}"
|
|
492
520
|
|
|
493
521
|
self.logger.debug(
|
|
494
|
-
"[ETCD] 准备删除文件: key=%s",
|
|
522
|
+
"[ETCD] 准备删除文件: client_id=%s, key=%s",
|
|
523
|
+
self.client_id,
|
|
495
524
|
file_key,
|
|
496
525
|
)
|
|
497
526
|
|
|
@@ -500,17 +529,25 @@ class EtcdMetadataClient(MetadataClient):
|
|
|
500
529
|
success = self._client.delete(key)
|
|
501
530
|
if success:
|
|
502
531
|
self.logger.info(
|
|
503
|
-
"[ETCD] Etcd 删除成功: key=%s, 等待 Watch 广播 DeleteEvent",
|
|
532
|
+
"[ETCD] Etcd 删除成功: client_id=%s, key=%s, 等待 Watch 广播 DeleteEvent",
|
|
533
|
+
self.client_id,
|
|
504
534
|
file_key,
|
|
505
535
|
)
|
|
506
536
|
return True
|
|
507
537
|
else:
|
|
508
538
|
self.logger.warning(
|
|
509
|
-
"[ETCD] Etcd 删除返回 False: key=%s (可能已不存在)",
|
|
539
|
+
"[ETCD] Etcd 删除返回 False: client_id=%s, key=%s (可能已不存在)",
|
|
540
|
+
self.client_id,
|
|
541
|
+
file_key,
|
|
510
542
|
)
|
|
511
543
|
return False
|
|
512
544
|
except Exception as e:
|
|
513
|
-
self.logger.error(
|
|
545
|
+
self.logger.error(
|
|
546
|
+
"[ETCD] 删除文件异常: client_id=%s, key=%s, error=%s",
|
|
547
|
+
self.client_id,
|
|
548
|
+
file_key,
|
|
549
|
+
e,
|
|
550
|
+
)
|
|
514
551
|
return False
|
|
515
552
|
|
|
516
553
|
def delete_prefix(self, prefix: str) -> bool:
|
|
@@ -544,7 +581,8 @@ class EtcdMetadataClient(MetadataClient):
|
|
|
544
581
|
last_deleted = getattr(response, "deleted", None)
|
|
545
582
|
elapsed = time.perf_counter() - start
|
|
546
583
|
self.logger.info(
|
|
547
|
-
"[ETCD] delete_prefix 返回: prefix=%s, attempt=%d/%d, deleted=%s, elapsed=%.3fs",
|
|
584
|
+
"[ETCD] delete_prefix 返回: client_id=%s, prefix=%s, attempt=%d/%d, deleted=%s, elapsed=%.3fs",
|
|
585
|
+
self.client_id,
|
|
548
586
|
prefix,
|
|
549
587
|
attempt,
|
|
550
588
|
max_attempts,
|
|
@@ -554,7 +592,8 @@ class EtcdMetadataClient(MetadataClient):
|
|
|
554
592
|
except Exception as e:
|
|
555
593
|
last_error = e
|
|
556
594
|
self.logger.warning(
|
|
557
|
-
"[ETCD] delete_prefix 异常: prefix=%s, attempt=%d/%d, error=%s",
|
|
595
|
+
"[ETCD] delete_prefix 异常: client_id=%s, prefix=%s, attempt=%d/%d, error=%s",
|
|
596
|
+
self.client_id,
|
|
558
597
|
prefix,
|
|
559
598
|
attempt,
|
|
560
599
|
max_attempts,
|
|
@@ -704,7 +743,11 @@ class EtcdMetadataClient(MetadataClient):
|
|
|
704
743
|
keys_to_clear = list(files.keys())
|
|
705
744
|
count = len(keys_to_clear)
|
|
706
745
|
|
|
707
|
-
self.logger.info(
|
|
746
|
+
self.logger.info(
|
|
747
|
+
"[ETCD] 开始清空文件: client_id=%s, total=%d (查询自 Etcd)",
|
|
748
|
+
self.client_id,
|
|
749
|
+
count,
|
|
750
|
+
)
|
|
708
751
|
|
|
709
752
|
# 逐个删除以触发 Watch DeleteEvent
|
|
710
753
|
failed_keys = []
|
|
@@ -48,9 +48,17 @@ class P2PConfig:
|
|
|
48
48
|
retry_interval: int = 5
|
|
49
49
|
log_name: str | None = None
|
|
50
50
|
enable_watch: bool = True # 是否启用 Etcd Watch (Consumer get 节点设为 False)
|
|
51
|
+
etcd_lease_ttl: int = 3600 # Etcd 租约 TTL (秒),默认 1小时
|
|
51
52
|
|
|
52
53
|
def __post_init__(self) -> None:
|
|
53
54
|
"""初始化后处理."""
|
|
55
|
+
# 优先使用环境变量中的 TTL 设置
|
|
56
|
+
if os.getenv("P2P_ETCD_LEASE_TTL"):
|
|
57
|
+
try:
|
|
58
|
+
self.etcd_lease_ttl = int(os.environ["P2P_ETCD_LEASE_TTL"])
|
|
59
|
+
except ValueError:
|
|
60
|
+
pass
|
|
61
|
+
|
|
54
62
|
# 自动填充 local_host
|
|
55
63
|
if not self.local_host:
|
|
56
64
|
self.local_host = os.getenv("POD_IP", "").strip() or "127.0.0.1"
|
|
@@ -206,7 +206,8 @@ class LoggerManager:
|
|
|
206
206
|
else:
|
|
207
207
|
fmt = f"%(asctime)s - {client_prefix}%(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s"
|
|
208
208
|
|
|
209
|
-
|
|
209
|
+
# 不指定 datefmt,默认格式为 "%Y-%m-%d %H:%M:%S,333" (包含毫秒)
|
|
210
|
+
formatter = logging.Formatter(fmt)
|
|
210
211
|
|
|
211
212
|
log_file = cls._get_log_dir() / f"{name}.log"
|
|
212
213
|
print(
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: p2pstore
|
|
3
|
-
Version: 0.1.7.
|
|
4
|
-
Summary: P2P Store - 基于 RDMA
|
|
3
|
+
Version: 0.1.7.dev2
|
|
4
|
+
Summary: P2P Store - 基于 RDMA 的高性能分布式数据传输系统,更改 ttl 设置
|
|
5
5
|
Author: Paddle
|
|
6
6
|
License-Expression: Apache-2.0
|
|
7
7
|
Keywords: p2p,rdma,distributed,tensor,transfer
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|