hello-datap-component-base 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/PKG-INFO +1 -1
  2. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/__init__.py +1 -1
  3. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/cli.py +18 -2
  4. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/discover.py +117 -3
  5. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/mns_client.py +52 -1
  6. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base.egg-info/PKG-INFO +1 -1
  7. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/pyproject.toml +1 -1
  8. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/README.md +0 -0
  9. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/base.py +0 -0
  10. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/config.py +0 -0
  11. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/logger.py +0 -0
  12. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/oss_client.py +0 -0
  13. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base/runner.py +0 -0
  14. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base.egg-info/SOURCES.txt +0 -0
  15. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base.egg-info/dependency_links.txt +0 -0
  16. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base.egg-info/entry_points.txt +0 -0
  17. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base.egg-info/requires.txt +0 -0
  18. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/hello_datap_component_base.egg-info/top_level.txt +0 -0
  19. {hello_datap_component_base-0.2.2 → hello_datap_component_base-0.2.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hello-datap-component-base
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: A unified server management framework for data processing component
5
5
  Author-email: zhaohaidong <zhaohaidong389@hellobike.com>
6
6
  License: MIT
@@ -13,7 +13,7 @@ from .discover import find_service_classes, get_single_service_class
13
13
  # 导入 logger 实例
14
14
  from .logger import logger
15
15
 
16
- __version__ = "0.1.9"
16
+ __version__ = "0.2.3"
17
17
  __author__ = "zhaohaidong"
18
18
  __email__ = "zhaohaidong389@hellobike.com"
19
19
 
@@ -11,7 +11,7 @@ from .config import ServerConfig
11
11
 
12
12
 
13
13
  @click.group()
14
- @click.version_option(version="0.1.9")
14
+ @click.version_option(version="0.2.4")
15
15
  def cli():
16
16
  """数据处理平台组件基类 - 统一的服务管理框架"""
17
17
  pass
@@ -19,13 +19,29 @@ def cli():
19
19
 
20
20
  @cli.command()
21
21
  @click.argument("config_path")
22
- @click.option("--class-name", "-c", help="指定要使用的服务类名")
22
+ @click.option(
23
+ "--class-name", "-c",
24
+ help="指定要使用的服务类名或完整路径。支持格式: ClassName, module.ClassName, path/to/module.ClassName"
25
+ )
23
26
  def start(config_path: str, class_name: Optional[str] = None):
24
27
  """
25
28
  启动服务并执行一次处理(支持本地文件路径或HTTP URL)
26
29
 
27
30
  输入数据从配置文件的 params.input_data 中获取。
28
31
  如果 params.input_data 不存在,将使用默认测试数据。
32
+
33
+ \b
34
+ --class-name 支持的格式:
35
+ - ClassName 仅类名
36
+ - module.ClassName 模块名.类名
37
+ - path/to/module.ClassName 相对路径/模块名.类名
38
+ - path.to.module.ClassName 点分隔的完整路径
39
+
40
+ \b
41
+ 示例:
42
+ component_manager start config.json -c MyService
43
+ component_manager start config.json -c example_service.MyService
44
+ component_manager start config.json -c services/my_service.MyService
29
45
  """
30
46
  runner = ServiceRunner(config_path, class_name)
31
47
  runner.run()
@@ -122,6 +122,100 @@ def find_service_classes(
122
122
  return service_classes
123
123
 
124
124
 
125
+ def _load_class_by_path(class_path: str, search_path: str = ".") -> Type[BaseService]:
126
+ """
127
+ 根据相对路径加载服务类
128
+
129
+ 支持的格式:
130
+ - "ClassName" - 仅类名,从所有发现的服务中查找
131
+ - "module.ClassName" - 模块名.类名
132
+ - "path/to/module.ClassName" - 相对路径/模块名.类名
133
+ - "path.to.module.ClassName" - 点分隔的完整路径
134
+
135
+ Args:
136
+ class_path: 类的路径,支持多种格式
137
+ search_path: 搜索的根路径
138
+
139
+ Returns:
140
+ 服务类
141
+
142
+ Raises:
143
+ ValueError: 如果找不到指定的类
144
+ """
145
+ import sys
146
+ import os
147
+
148
+ # 确保搜索路径在 Python 路径中
149
+ search_path_obj = Path(search_path).resolve()
150
+ search_path_str = str(search_path_obj)
151
+
152
+ current_dir = os.getcwd()
153
+ if current_dir not in sys.path:
154
+ sys.path.insert(0, current_dir)
155
+ if search_path_str not in sys.path:
156
+ sys.path.insert(0, search_path_str)
157
+ if '.' not in sys.path:
158
+ sys.path.insert(0, '.')
159
+
160
+ # 解析 class_path
161
+ # 如果不包含 '.' 则认为只是类名
162
+ if '.' not in class_path and '/' not in class_path:
163
+ # 仅类名,返回 None 表示需要从发现的服务中查找
164
+ return None
165
+
166
+ # 将路径分隔符统一转换为点分隔
167
+ # 例如: "path/to/module.ClassName" -> "path.to.module.ClassName"
168
+ normalized_path = class_path.replace('/', '.').replace('\\', '.')
169
+
170
+ # 去掉 .py 后缀(如果有)
171
+ if '.py.' in normalized_path:
172
+ normalized_path = normalized_path.replace('.py.', '.')
173
+ elif normalized_path.endswith('.py'):
174
+ normalized_path = normalized_path[:-3]
175
+
176
+ # 分离模块路径和类名
177
+ # 最后一个点之后的是类名
178
+ parts = normalized_path.rsplit('.', 1)
179
+ if len(parts) != 2:
180
+ raise ValueError(
181
+ f"Invalid class path format: '{class_path}'. "
182
+ f"Expected format: 'module.ClassName' or 'path/to/module.ClassName'"
183
+ )
184
+
185
+ module_path, class_name = parts
186
+
187
+ try:
188
+ # 动态导入模块
189
+ module = importlib.import_module(module_path)
190
+
191
+ # 获取类
192
+ if not hasattr(module, class_name):
193
+ raise ValueError(
194
+ f"Class '{class_name}' not found in module '{module_path}'"
195
+ )
196
+
197
+ cls = getattr(module, class_name)
198
+
199
+ # 验证是否是 BaseService 的子类
200
+ if not (inspect.isclass(cls) and issubclass(cls, BaseService)):
201
+ raise ValueError(
202
+ f"Class '{class_name}' is not a subclass of BaseService"
203
+ )
204
+
205
+ if inspect.isabstract(cls):
206
+ raise ValueError(
207
+ f"Class '{class_name}' is abstract and cannot be instantiated"
208
+ )
209
+
210
+ return cls
211
+
212
+ except ImportError as e:
213
+ raise ValueError(
214
+ f"Failed to import module '{module_path}': {e}\n"
215
+ f"Make sure the module path is correct and all dependencies are installed."
216
+ )
217
+
218
+
125
219
  def get_single_service_class(
126
220
  search_path: str = ".",
127
221
  class_name: Optional[str] = None
@@ -131,7 +225,12 @@ def get_single_service_class(
131
225
 
132
226
  Args:
133
227
  search_path: 搜索路径
134
- class_name: 指定的类名(可选)
228
+ class_name: 指定的类名或完整路径(可选)
229
+ 支持的格式:
230
+ - "ClassName" - 仅类名,从所有发现的服务中查找
231
+ - "module.ClassName" - 模块名.类名
232
+ - "path/to/module.ClassName" - 相对路径/模块名.类名
233
+ - "path.to.module.ClassName" - 点分隔的完整路径
135
234
 
136
235
  Returns:
137
236
  服务类
@@ -139,6 +238,13 @@ def get_single_service_class(
139
238
  Raises:
140
239
  ValueError: 如果找到0个或多个服务类
141
240
  """
241
+ # 如果指定了包含路径的类名,尝试直接加载
242
+ if class_name:
243
+ direct_class = _load_class_by_path(class_name, search_path)
244
+ if direct_class is not None:
245
+ return direct_class
246
+ # 如果返回 None,说明只是类名,继续使用发现机制
247
+
142
248
  service_classes = find_service_classes(search_path)
143
249
 
144
250
  if not service_classes:
@@ -166,11 +272,15 @@ def get_single_service_class(
166
272
  for f in possible_files:
167
273
  error_msg += f" - {f}\n"
168
274
  error_msg += f"\nTry using --class-name to specify the service class name."
275
+ error_msg += f"\nSupported formats:"
276
+ error_msg += f"\n - --class-name ClassName"
277
+ error_msg += f"\n - --class-name module.ClassName"
278
+ error_msg += f"\n - --class-name path/to/module.ClassName"
169
279
 
170
280
  raise ValueError(error_msg)
171
281
 
172
282
  if class_name:
173
- # 查找指定类名的服务
283
+ # 查找指定类名的服务(此时 class_name 只是类名,不包含路径)
174
284
  for module_path, cls in service_classes:
175
285
  if cls.__name__ == class_name:
176
286
  return cls
@@ -181,7 +291,11 @@ def get_single_service_class(
181
291
  class_list = [f"{module}.{cls.__name__}" for module, cls in service_classes]
182
292
  raise ValueError(
183
293
  f"Multiple service classes found: {class_list}. "
184
- f"Please specify which one to use with --class-name."
294
+ f"Please specify which one to use with --class-name.\n"
295
+ f"Supported formats:\n"
296
+ f" - --class-name ClassName\n"
297
+ f" - --class-name module.ClassName\n"
298
+ f" - --class-name path/to/module.ClassName"
185
299
  )
186
300
 
187
301
  return service_classes[0][1]
@@ -9,6 +9,11 @@ import logging
9
9
 
10
10
  logger = logging.getLogger(__name__)
11
11
 
12
+ # MNS 消息体最大长度(64KB),预留一些空间给 JSON 结构
13
+ MNS_MAX_MESSAGE_SIZE = 64 * 1024
14
+ # 错误消息最大长度(预留空间给其他字段)
15
+ MAX_ERROR_MESSAGE_LENGTH = 8 * 1024 # 8KB
16
+
12
17
 
13
18
  class MNSClient:
14
19
  """阿里云 MNS 消息队列客户端"""
@@ -145,6 +150,49 @@ class MNSClient:
145
150
  self._queue.send_message(msg)
146
151
  return True
147
152
 
153
+ def _truncate_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
154
+ """
155
+ 截断消息中过长的字段,确保消息体不超过 MNS 限制
156
+
157
+ Args:
158
+ message: 原始消息字典
159
+
160
+ Returns:
161
+ 截断后的消息字典
162
+ """
163
+ # 深拷贝,避免修改原始数据
164
+ import copy
165
+ truncated = copy.deepcopy(message)
166
+
167
+ # 截断 message 字段(错误信息)
168
+ if 'message' in truncated and isinstance(truncated['message'], str):
169
+ original_len = len(truncated['message'])
170
+ if original_len > MAX_ERROR_MESSAGE_LENGTH:
171
+ truncated['message'] = (
172
+ truncated['message'][:MAX_ERROR_MESSAGE_LENGTH] +
173
+ f"\n... [truncated, original length: {original_len}]"
174
+ )
175
+ logger.warning(
176
+ f"消息的 message 字段过长({original_len} 字符),已截断至 {MAX_ERROR_MESSAGE_LENGTH} 字符"
177
+ )
178
+
179
+ # 检查整体消息大小,如果仍然过大,进一步截断 out_put
180
+ message_body = json.dumps(truncated, ensure_ascii=False)
181
+ if len(message_body.encode('utf-8')) > MNS_MAX_MESSAGE_SIZE:
182
+ if 'data' in truncated and isinstance(truncated['data'], dict):
183
+ if 'out_put' in truncated['data'] and truncated['data']['out_put'] is not None:
184
+ out_put_str = json.dumps(truncated['data']['out_put'], ensure_ascii=False)
185
+ if len(out_put_str) > MAX_ERROR_MESSAGE_LENGTH:
186
+ truncated['data']['out_put'] = {
187
+ '_truncated': True,
188
+ '_message': f'Output too large ({len(out_put_str)} chars), truncated'
189
+ }
190
+ logger.warning(
191
+ f"消息的 out_put 字段过大({len(out_put_str)} 字符),已截断"
192
+ )
193
+
194
+ return truncated
195
+
148
196
  def send_message(self, message: Dict[str, Any]) -> bool:
149
197
  """
150
198
  发送消息到队列(带重试逻辑)
@@ -161,8 +209,11 @@ class MNSClient:
161
209
  logger.warning("MNS 队列未初始化,跳过消息发送")
162
210
  return False
163
211
 
212
+ # 截断过长的消息字段
213
+ truncated_message = self._truncate_message(message)
214
+
164
215
  # 将消息转换为 JSON 字符串
165
- message_body = json.dumps(message, ensure_ascii=False)
216
+ message_body = json.dumps(truncated_message, ensure_ascii=False)
166
217
 
167
218
  # 重试逻辑
168
219
  last_exception = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hello-datap-component-base
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: A unified server management framework for data processing component
5
5
  Author-email: zhaohaidong <zhaohaidong389@hellobike.com>
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hello-datap-component-base"
7
- version = "0.2.2"
7
+ version = "0.2.4"
8
8
  description = "A unified server management framework for data processing component"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"