hello-datap-component-base 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/PKG-INFO +1 -1
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/__init__.py +1 -1
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/cli.py +18 -2
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/discover.py +117 -3
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/mns_client.py +52 -1
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base.egg-info/PKG-INFO +1 -1
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/pyproject.toml +1 -1
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/README.md +0 -0
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/base.py +0 -0
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/config.py +0 -0
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/logger.py +0 -0
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/oss_client.py +0 -0
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/runner.py +0 -0
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base.egg-info/SOURCES.txt +0 -0
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base.egg-info/dependency_links.txt +0 -0
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base.egg-info/entry_points.txt +0 -0
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base.egg-info/requires.txt +0 -0
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base.egg-info/top_level.txt +0 -0
- {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/setup.cfg +0 -0
|
@@ -11,7 +11,7 @@ from .config import ServerConfig
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
@click.group()
|
|
14
|
-
@click.version_option(version="0.
|
|
14
|
+
@click.version_option(version="0.2.4")
|
|
15
15
|
def cli():
|
|
16
16
|
"""数据处理平台组件基类 - 统一的服务管理框架"""
|
|
17
17
|
pass
|
|
@@ -19,13 +19,29 @@ def cli():
|
|
|
19
19
|
|
|
20
20
|
@cli.command()
|
|
21
21
|
@click.argument("config_path")
|
|
22
|
-
@click.option(
|
|
22
|
+
@click.option(
|
|
23
|
+
"--class-name", "-c",
|
|
24
|
+
help="指定要使用的服务类名或完整路径。支持格式: ClassName, module.ClassName, path/to/module.ClassName"
|
|
25
|
+
)
|
|
23
26
|
def start(config_path: str, class_name: Optional[str] = None):
|
|
24
27
|
"""
|
|
25
28
|
启动服务并执行一次处理(支持本地文件路径或HTTP URL)
|
|
26
29
|
|
|
27
30
|
输入数据从配置文件的 params.input_data 中获取。
|
|
28
31
|
如果 params.input_data 不存在,将使用默认测试数据。
|
|
32
|
+
|
|
33
|
+
\b
|
|
34
|
+
--class-name 支持的格式:
|
|
35
|
+
- ClassName 仅类名
|
|
36
|
+
- module.ClassName 模块名.类名
|
|
37
|
+
- path/to/module.ClassName 相对路径/模块名.类名
|
|
38
|
+
- path.to.module.ClassName 点分隔的完整路径
|
|
39
|
+
|
|
40
|
+
\b
|
|
41
|
+
示例:
|
|
42
|
+
component_manager start config.json -c MyService
|
|
43
|
+
component_manager start config.json -c example_service.MyService
|
|
44
|
+
component_manager start config.json -c services/my_service.MyService
|
|
29
45
|
"""
|
|
30
46
|
runner = ServiceRunner(config_path, class_name)
|
|
31
47
|
runner.run()
|
|
@@ -122,6 +122,100 @@ def find_service_classes(
|
|
|
122
122
|
return service_classes
|
|
123
123
|
|
|
124
124
|
|
|
125
|
+
def _load_class_by_path(class_path: str, search_path: str = ".") -> Type[BaseService]:
|
|
126
|
+
"""
|
|
127
|
+
根据相对路径加载服务类
|
|
128
|
+
|
|
129
|
+
支持的格式:
|
|
130
|
+
- "ClassName" - 仅类名,从所有发现的服务中查找
|
|
131
|
+
- "module.ClassName" - 模块名.类名
|
|
132
|
+
- "path/to/module.ClassName" - 相对路径/模块名.类名
|
|
133
|
+
- "path.to.module.ClassName" - 点分隔的完整路径
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
class_path: 类的路径,支持多种格式
|
|
137
|
+
search_path: 搜索的根路径
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
服务类
|
|
141
|
+
|
|
142
|
+
Raises:
|
|
143
|
+
ValueError: 如果找不到指定的类
|
|
144
|
+
"""
|
|
145
|
+
import sys
|
|
146
|
+
import os
|
|
147
|
+
|
|
148
|
+
# 确保搜索路径在 Python 路径中
|
|
149
|
+
search_path_obj = Path(search_path).resolve()
|
|
150
|
+
search_path_str = str(search_path_obj)
|
|
151
|
+
|
|
152
|
+
current_dir = os.getcwd()
|
|
153
|
+
if current_dir not in sys.path:
|
|
154
|
+
sys.path.insert(0, current_dir)
|
|
155
|
+
if search_path_str not in sys.path:
|
|
156
|
+
sys.path.insert(0, search_path_str)
|
|
157
|
+
if '.' not in sys.path:
|
|
158
|
+
sys.path.insert(0, '.')
|
|
159
|
+
|
|
160
|
+
# 解析 class_path
|
|
161
|
+
# 如果不包含 '.' 则认为只是类名
|
|
162
|
+
if '.' not in class_path and '/' not in class_path:
|
|
163
|
+
# 仅类名,返回 None 表示需要从发现的服务中查找
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
# 将路径分隔符统一转换为点分隔
|
|
167
|
+
# 例如: "path/to/module.ClassName" -> "path.to.module.ClassName"
|
|
168
|
+
normalized_path = class_path.replace('/', '.').replace('\\', '.')
|
|
169
|
+
|
|
170
|
+
# 去掉 .py 后缀(如果有)
|
|
171
|
+
if '.py.' in normalized_path:
|
|
172
|
+
normalized_path = normalized_path.replace('.py.', '.')
|
|
173
|
+
elif normalized_path.endswith('.py'):
|
|
174
|
+
normalized_path = normalized_path[:-3]
|
|
175
|
+
|
|
176
|
+
# 分离模块路径和类名
|
|
177
|
+
# 最后一个点之后的是类名
|
|
178
|
+
parts = normalized_path.rsplit('.', 1)
|
|
179
|
+
if len(parts) != 2:
|
|
180
|
+
raise ValueError(
|
|
181
|
+
f"Invalid class path format: '{class_path}'. "
|
|
182
|
+
f"Expected format: 'module.ClassName' or 'path/to/module.ClassName'"
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
module_path, class_name = parts
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
# 动态导入模块
|
|
189
|
+
module = importlib.import_module(module_path)
|
|
190
|
+
|
|
191
|
+
# 获取类
|
|
192
|
+
if not hasattr(module, class_name):
|
|
193
|
+
raise ValueError(
|
|
194
|
+
f"Class '{class_name}' not found in module '{module_path}'"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
cls = getattr(module, class_name)
|
|
198
|
+
|
|
199
|
+
# 验证是否是 BaseService 的子类
|
|
200
|
+
if not (inspect.isclass(cls) and issubclass(cls, BaseService)):
|
|
201
|
+
raise ValueError(
|
|
202
|
+
f"Class '{class_name}' is not a subclass of BaseService"
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
if inspect.isabstract(cls):
|
|
206
|
+
raise ValueError(
|
|
207
|
+
f"Class '{class_name}' is abstract and cannot be instantiated"
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
return cls
|
|
211
|
+
|
|
212
|
+
except ImportError as e:
|
|
213
|
+
raise ValueError(
|
|
214
|
+
f"Failed to import module '{module_path}': {e}\n"
|
|
215
|
+
f"Make sure the module path is correct and all dependencies are installed."
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
125
219
|
def get_single_service_class(
|
|
126
220
|
search_path: str = ".",
|
|
127
221
|
class_name: Optional[str] = None
|
|
@@ -131,7 +225,12 @@ def get_single_service_class(
|
|
|
131
225
|
|
|
132
226
|
Args:
|
|
133
227
|
search_path: 搜索路径
|
|
134
|
-
class_name:
|
|
228
|
+
class_name: 指定的类名或完整路径(可选)
|
|
229
|
+
支持的格式:
|
|
230
|
+
- "ClassName" - 仅类名,从所有发现的服务中查找
|
|
231
|
+
- "module.ClassName" - 模块名.类名
|
|
232
|
+
- "path/to/module.ClassName" - 相对路径/模块名.类名
|
|
233
|
+
- "path.to.module.ClassName" - 点分隔的完整路径
|
|
135
234
|
|
|
136
235
|
Returns:
|
|
137
236
|
服务类
|
|
@@ -139,6 +238,13 @@ def get_single_service_class(
|
|
|
139
238
|
Raises:
|
|
140
239
|
ValueError: 如果找到0个或多个服务类
|
|
141
240
|
"""
|
|
241
|
+
# 如果指定了包含路径的类名,尝试直接加载
|
|
242
|
+
if class_name:
|
|
243
|
+
direct_class = _load_class_by_path(class_name, search_path)
|
|
244
|
+
if direct_class is not None:
|
|
245
|
+
return direct_class
|
|
246
|
+
# 如果返回 None,说明只是类名,继续使用发现机制
|
|
247
|
+
|
|
142
248
|
service_classes = find_service_classes(search_path)
|
|
143
249
|
|
|
144
250
|
if not service_classes:
|
|
@@ -166,11 +272,15 @@ def get_single_service_class(
|
|
|
166
272
|
for f in possible_files:
|
|
167
273
|
error_msg += f" - {f}\n"
|
|
168
274
|
error_msg += f"\nTry using --class-name to specify the service class name."
|
|
275
|
+
error_msg += f"\nSupported formats:"
|
|
276
|
+
error_msg += f"\n - --class-name ClassName"
|
|
277
|
+
error_msg += f"\n - --class-name module.ClassName"
|
|
278
|
+
error_msg += f"\n - --class-name path/to/module.ClassName"
|
|
169
279
|
|
|
170
280
|
raise ValueError(error_msg)
|
|
171
281
|
|
|
172
282
|
if class_name:
|
|
173
|
-
#
|
|
283
|
+
# 查找指定类名的服务(此时 class_name 只是类名,不包含路径)
|
|
174
284
|
for module_path, cls in service_classes:
|
|
175
285
|
if cls.__name__ == class_name:
|
|
176
286
|
return cls
|
|
@@ -181,7 +291,11 @@ def get_single_service_class(
|
|
|
181
291
|
class_list = [f"{module}.{cls.__name__}" for module, cls in service_classes]
|
|
182
292
|
raise ValueError(
|
|
183
293
|
f"Multiple service classes found: {class_list}. "
|
|
184
|
-
f"Please specify which one to use with --class-name
|
|
294
|
+
f"Please specify which one to use with --class-name.\n"
|
|
295
|
+
f"Supported formats:\n"
|
|
296
|
+
f" - --class-name ClassName\n"
|
|
297
|
+
f" - --class-name module.ClassName\n"
|
|
298
|
+
f" - --class-name path/to/module.ClassName"
|
|
185
299
|
)
|
|
186
300
|
|
|
187
301
|
return service_classes[0][1]
|
|
@@ -9,6 +9,11 @@ import logging
|
|
|
9
9
|
|
|
10
10
|
logger = logging.getLogger(__name__)
|
|
11
11
|
|
|
12
|
+
# MNS 消息体最大长度(64KB),预留一些空间给 JSON 结构
|
|
13
|
+
MNS_MAX_MESSAGE_SIZE = 64 * 1024
|
|
14
|
+
# 错误消息最大长度(预留空间给其他字段)
|
|
15
|
+
MAX_ERROR_MESSAGE_LENGTH = 8 * 1024 # 8KB
|
|
16
|
+
|
|
12
17
|
|
|
13
18
|
class MNSClient:
|
|
14
19
|
"""阿里云 MNS 消息队列客户端"""
|
|
@@ -145,6 +150,49 @@ class MNSClient:
|
|
|
145
150
|
self._queue.send_message(msg)
|
|
146
151
|
return True
|
|
147
152
|
|
|
153
|
+
def _truncate_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
|
|
154
|
+
"""
|
|
155
|
+
截断消息中过长的字段,确保消息体不超过 MNS 限制
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
message: 原始消息字典
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
截断后的消息字典
|
|
162
|
+
"""
|
|
163
|
+
# 深拷贝,避免修改原始数据
|
|
164
|
+
import copy
|
|
165
|
+
truncated = copy.deepcopy(message)
|
|
166
|
+
|
|
167
|
+
# 截断 message 字段(错误信息)
|
|
168
|
+
if 'message' in truncated and isinstance(truncated['message'], str):
|
|
169
|
+
original_len = len(truncated['message'])
|
|
170
|
+
if original_len > MAX_ERROR_MESSAGE_LENGTH:
|
|
171
|
+
truncated['message'] = (
|
|
172
|
+
truncated['message'][:MAX_ERROR_MESSAGE_LENGTH] +
|
|
173
|
+
f"\n... [truncated, original length: {original_len}]"
|
|
174
|
+
)
|
|
175
|
+
logger.warning(
|
|
176
|
+
f"消息的 message 字段过长({original_len} 字符),已截断至 {MAX_ERROR_MESSAGE_LENGTH} 字符"
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# 检查整体消息大小,如果仍然过大,进一步截断 out_put
|
|
180
|
+
message_body = json.dumps(truncated, ensure_ascii=False)
|
|
181
|
+
if len(message_body.encode('utf-8')) > MNS_MAX_MESSAGE_SIZE:
|
|
182
|
+
if 'data' in truncated and isinstance(truncated['data'], dict):
|
|
183
|
+
if 'out_put' in truncated['data'] and truncated['data']['out_put'] is not None:
|
|
184
|
+
out_put_str = json.dumps(truncated['data']['out_put'], ensure_ascii=False)
|
|
185
|
+
if len(out_put_str) > MAX_ERROR_MESSAGE_LENGTH:
|
|
186
|
+
truncated['data']['out_put'] = {
|
|
187
|
+
'_truncated': True,
|
|
188
|
+
'_message': f'Output too large ({len(out_put_str)} chars), truncated'
|
|
189
|
+
}
|
|
190
|
+
logger.warning(
|
|
191
|
+
f"消息的 out_put 字段过大({len(out_put_str)} 字符),已截断"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
return truncated
|
|
195
|
+
|
|
148
196
|
def send_message(self, message: Dict[str, Any]) -> bool:
|
|
149
197
|
"""
|
|
150
198
|
发送消息到队列(带重试逻辑)
|
|
@@ -161,8 +209,11 @@ class MNSClient:
|
|
|
161
209
|
logger.warning("MNS 队列未初始化,跳过消息发送")
|
|
162
210
|
return False
|
|
163
211
|
|
|
212
|
+
# 截断过长的消息字段
|
|
213
|
+
truncated_message = self._truncate_message(message)
|
|
214
|
+
|
|
164
215
|
# 将消息转换为 JSON 字符串
|
|
165
|
-
message_body = json.dumps(
|
|
216
|
+
message_body = json.dumps(truncated_message, ensure_ascii=False)
|
|
166
217
|
|
|
167
218
|
# 重试逻辑
|
|
168
219
|
last_exception = None
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "hello-datap-component-base"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.4"
|
|
8
8
|
description = "A unified server management framework for data processing component"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.8"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|