hello-datap-component-base 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ """
2
+ 数据处理平台组件基类 - 统一的服务管理框架
3
+
4
+ 提供标准化的数据处理组件开发框架,统一用户代码的入参和出参以及程序执行入口。
5
+ """
6
+
7
+ from .base import BaseService, ServiceConfig
8
+ from .runner import ServiceRunner
9
+ from .config import ServerConfig, RuntimeEnv
10
+ from .logger import setup_logging, get_service_logger
11
+ from .discover import find_service_classes, get_single_service_class
12
+
13
+ # 导入 logger 实例
14
+ from .logger import logger
15
+
16
+ __version__ = "0.1.9"
17
+ __author__ = "zhaohaidong"
18
+ __email__ = "zhaohaidong389@hellobike.com"
19
+
20
+ __all__ = [
21
+ "BaseService",
22
+ "ServiceConfig",
23
+ "ServerConfig",
24
+ "RuntimeEnv",
25
+ "ServiceRunner",
26
+ "setup_logging",
27
+ "get_service_logger",
28
+ "logger",
29
+ "find_service_classes",
30
+ "get_single_service_class",
31
+ ]
@@ -0,0 +1,211 @@
1
+ import asyncio
2
+ import time
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any, Dict, Optional
5
+ from pydantic import BaseModel, ConfigDict
6
+
7
+
8
+ class ServiceConfig(BaseModel):
9
+ """服务配置基类"""
10
+ name: str
11
+ version: Optional[str] = None
12
+ params: Dict[str, Any] = {}
13
+ runtime_env: Optional[Dict[str, Any]] = None
14
+ work_flow_id: Optional[int] = None
15
+ work_flow_instance_id: Optional[int] = None
16
+ task_id: Optional[str] = None
17
+
18
+ model_config = ConfigDict(extra="ignore")
19
+
20
+
21
+ class BaseService(ABC):
22
+ """服务基类,所有用户服务必须继承此类"""
23
+
24
+ _config: Optional[ServiceConfig] = None
25
+ _logger = None
26
+
27
+ def __init__(self, config: ServiceConfig):
28
+ self._config = config
29
+ self._setup_logger()
30
+ self._setup_runtime_env()
31
+ # 记录服务初始化日志(版本信息会自动添加)
32
+ self.logger.info("Service initialized")
33
+
34
+ def _setup_logger(self):
35
+ """设置日志器"""
36
+ from .logger import get_service_logger, set_service_context
37
+ # 设置全局服务上下文,使全局 logger 自动包含服务信息
38
+ set_service_context(self._config.name, self._config.version)
39
+ self._logger = get_service_logger(self._config.name, self._config.version)
40
+
41
+ def _setup_runtime_env(self):
42
+ """设置运行时环境"""
43
+ if self._config.runtime_env:
44
+ # 这里可以设置环境变量等
45
+ import os
46
+ env_vars = self._config.runtime_env.get("env_vars")
47
+ if env_vars and isinstance(env_vars, dict):
48
+ for key, value in env_vars.items():
49
+ os.environ[key] = str(value)
50
+
51
+ @property
52
+ def config(self) -> ServiceConfig:
53
+ """获取配置"""
54
+ if self._config is None:
55
+ raise ValueError("Service not initialized with config")
56
+ return self._config
57
+
58
+ @property
59
+ def logger(self):
60
+ """获取日志器"""
61
+ if self._logger is None:
62
+ self._setup_logger()
63
+ return self._logger
64
+
65
+ @property
66
+ def params(self) -> Dict[str, Any]:
67
+ """获取参数"""
68
+ return self.config.params
69
+
70
+ @abstractmethod
71
+ async def process(self, data: Dict[str, Any]) -> Dict[str, Any]:
72
+ """
73
+ 处理请求的抽象方法,子类必须实现
74
+
75
+ Args:
76
+ data: 输入数据
77
+
78
+ Returns:
79
+ 处理结果
80
+ """
81
+ pass
82
+
83
+
84
+ async def pre_process(self, data: Dict[str, Any]) -> Dict[str, Any]:
85
+ """
86
+ 预处理钩子,子类可重写
87
+
88
+ Args:
89
+ data: 原始数据
90
+
91
+ Returns:
92
+ 处理后的数据
93
+ """
94
+ return data
95
+
96
+ async def post_process(self, data: Dict[str, Any], result: Dict[str, Any]) -> Dict[str, Any]:
97
+ """
98
+ 后处理钩子,子类可重写
99
+
100
+ Args:
101
+ data: 原始数据
102
+ result: 处理结果
103
+
104
+ Returns:
105
+ 最终结果
106
+ """
107
+ return result
108
+
109
+ def _format_result(
110
+ self,
111
+ code: int,
112
+ message: str,
113
+ output: Optional[Dict[str, Any]] = None,
114
+ processing_time: Optional[float] = None
115
+ ) -> Dict[str, Any]:
116
+ """
117
+ 格式化返回结果
118
+
119
+ Args:
120
+ code: 返回码,0 表示成功,非 0 表示失败
121
+ message: 返回消息
122
+ output: 用户程序的输出结果
123
+ processing_time: 处理时间(秒)
124
+
125
+ Returns:
126
+ 格式化后的结果字典
127
+ """
128
+ result = {
129
+ "code": code,
130
+ "message": message,
131
+ "data": {
132
+ "work_flow_id": self._config.work_flow_id,
133
+ "work_flow_instance_id": self._config.work_flow_instance_id,
134
+ "task_id": self._config.task_id,
135
+ "out_put": output
136
+ }
137
+ }
138
+ if processing_time is not None:
139
+ result["processing_time"] = processing_time
140
+ return result
141
+
142
+ async def handle_request(self, data: Dict[str, Any]) -> Dict[str, Any]:
143
+ """
144
+ 完整的请求处理流程
145
+
146
+ Args:
147
+ data: 输入数据
148
+
149
+ Returns:
150
+ 封装后的处理结果
151
+ """
152
+ import json
153
+
154
+ # 记录开始时间
155
+ start_time = time.time()
156
+
157
+ try:
158
+ # 记录入参日志(服务名称和版本会自动添加)
159
+ self.logger.info(
160
+ f"Processing request - Input: {json.dumps(data, ensure_ascii=False)}",
161
+ extra={"input_data": data}
162
+ )
163
+
164
+ # 预处理
165
+ processed_data = await self.pre_process(data)
166
+
167
+ # 执行业务逻辑
168
+ result = await self.process(processed_data)
169
+
170
+ # 后处理
171
+ final_result = await self.post_process(processed_data, result)
172
+
173
+ # 计算处理时间
174
+ processing_time = time.time() - start_time
175
+
176
+ # 记录结果日志(服务名称和版本会自动添加)
177
+ self.logger.info(
178
+ f"Request processed successfully - Result: {json.dumps(final_result, ensure_ascii=False)}, Processing time: {processing_time:.3f}s",
179
+ extra={"result": final_result, "processing_time": processing_time}
180
+ )
181
+
182
+ # 封装返回结果
183
+ formatted_result = self._format_result(
184
+ code=0,
185
+ message="success",
186
+ output=final_result,
187
+ processing_time=processing_time
188
+ )
189
+
190
+ return formatted_result
191
+
192
+ except Exception as e:
193
+ # 计算处理时间(即使发生异常也记录)
194
+ processing_time = time.time() - start_time
195
+
196
+ # 记录错误日志(服务名称和版本会自动添加)
197
+ error_msg = str(e)
198
+ self.logger.error(
199
+ f"Error processing request: {error_msg}, Processing time: {processing_time:.3f}s",
200
+ extra={"error": error_msg, "error_type": type(e).__name__, "processing_time": processing_time}
201
+ )
202
+
203
+ # 封装异常返回结果
204
+ formatted_result = self._format_result(
205
+ code=-1,
206
+ message=error_msg,
207
+ output=None,
208
+ processing_time=processing_time
209
+ )
210
+
211
+ return formatted_result
@@ -0,0 +1,276 @@
1
+ import click
2
+ import json
3
+ import sys
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ from .base import ServiceConfig
8
+ from .runner import ServiceRunner
9
+ from .discover import find_service_classes, get_single_service_class
10
+ from .config import ServerConfig
11
+
12
+
13
+ @click.group()
14
+ @click.version_option(version="0.1.9")
15
+ def cli():
16
+ """数据处理平台组件基类 - 统一的服务管理框架"""
17
+ pass
18
+
19
+
20
+ @cli.command()
21
+ @click.argument("config_path")
22
+ @click.option("--class-name", "-c", help="指定要使用的服务类名")
23
+ def start(config_path: str, class_name: Optional[str] = None):
24
+ """
25
+ 启动服务并执行一次处理(支持本地文件路径或HTTP URL)
26
+
27
+ 输入数据从配置文件的 params.input_data 中获取。
28
+ 如果 params.input_data 不存在,将使用默认测试数据。
29
+ """
30
+ runner = ServiceRunner(config_path, class_name)
31
+ runner.run()
32
+
33
+
34
+ @cli.command()
35
+ def init():
36
+ """初始化示例项目"""
37
+ # 创建示例目录结构
38
+ example_dir = Path("example_service")
39
+ example_dir.mkdir(exist_ok=True)
40
+
41
+ # 创建示例配置文件
42
+ config = {
43
+ "name": "example-service",
44
+ "version": "1.0.0",
45
+ "runtime_env": {
46
+ "pip": ["requests>=2.25.0"],
47
+ "env_vars": {
48
+ "LOG_LEVEL": "INFO",
49
+ "ENV": "development"
50
+ }
51
+ },
52
+ "params": {
53
+ "example_param": "value"
54
+ }
55
+ }
56
+
57
+ config_file = example_dir / "config.json"
58
+ with open(config_file, "w", encoding="utf-8") as f:
59
+ json.dump(config, f, indent=2, ensure_ascii=False)
60
+
61
+ # 创建示例服务代码
62
+ example_code = '''#!/usr/bin/env python3
63
+ """
64
+ 示例服务
65
+ """
66
+ import asyncio
67
+ from hello_datap_component_base import BaseService
68
+
69
+
70
+ class ExampleService(BaseService):
71
+ """示例服务实现"""
72
+
73
+ async def process(self, data: dict) -> dict:
74
+ """处理请求的业务逻辑"""
75
+ self.logger.info(f"收到请求数据: {data}")
76
+
77
+ # 模拟处理逻辑
78
+ await asyncio.sleep(0.1)
79
+
80
+ # 返回结果
81
+ result = {
82
+ "status": "success",
83
+ "message": f"Hello, {data.get('name', 'World')}!",
84
+ "processed_data": {
85
+ "original": data,
86
+ "extra_info": self.params
87
+ },
88
+ "timestamp": asyncio.get_event_loop().time()
89
+ }
90
+
91
+ return result
92
+
93
+
94
+ if __name__ == "__main__":
95
+ # 本地测试
96
+ import json
97
+ from hello_datap_component_base import ServiceConfig
98
+
99
+ async def test():
100
+ config = ServiceConfig(
101
+ name="test-example",
102
+ params={"test": "value"}
103
+ )
104
+ service = ExampleService(config)
105
+
106
+ # 测试请求
107
+ result = await service.process({"name": "Test User"})
108
+ print(json.dumps(result, indent=2, ensure_ascii=False))
109
+
110
+ asyncio.run(test())
111
+ '''
112
+
113
+ code_file = example_dir / "service.py"
114
+ with open(code_file, "w", encoding="utf-8") as f:
115
+ f.write(example_code)
116
+
117
+ # 创建 README
118
+ readme = f"""# 示例服务
119
+
120
+ 这是一个通过 component_manager 创建的服务示例。
121
+
122
+ ## 文件结构
123
+ - `config.json` - 服务配置文件
124
+ - `service.py` - 服务实现代码
125
+
126
+ ## 启动服务
127
+ ```bash
128
+ component_manager start config.json
129
+ ```
130
+
131
+ ## 本地测试
132
+ ```bash
133
+ cd {example_dir}
134
+ python service.py
135
+ ```
136
+
137
+ ## 配置说明
138
+ 配置文件包含以下主要部分:
139
+
140
+ 1. **name**: 服务名称
141
+ 2. **version**: 服务版本(可选)
142
+ 3. **runtime_env**: 运行时环境
143
+ - pip: Python 依赖包
144
+ - env_vars: 环境变量
145
+ 4. **params**: 服务参数
146
+ """
147
+
148
+ readme_file = example_dir / "README.md"
149
+ with open(readme_file, "w", encoding="utf-8") as f:
150
+ f.write(readme)
151
+
152
+ click.echo(f"✅ 示例项目已创建在: {example_dir}")
153
+ click.echo(f"📁 配置文件: {config_file}")
154
+ click.echo(f"🐍 示例代码: {code_file}")
155
+ click.echo("\n启动服务:")
156
+ click.echo(f" cd {example_dir}")
157
+ click.echo(f" component_manager start config.json")
158
+
159
+
160
+ @cli.command()
161
+ @click.option("--json", "-j", "json_format", is_flag=True, help="JSON 格式输出")
162
+ def list(json_format: bool = False):
163
+ """列出可用的服务类"""
164
+ try:
165
+ import os
166
+ # 确保使用当前工作目录
167
+ search_path = os.getcwd()
168
+ services = find_service_classes(search_path)
169
+
170
+ if json_format:
171
+ output = [
172
+ {
173
+ "module": module,
174
+ "class": cls.__name__,
175
+ "file": getattr(cls, "__module__", "unknown"),
176
+ }
177
+ for module, cls in services
178
+ ]
179
+ click.echo(json.dumps(output, indent=2, ensure_ascii=False))
180
+ else:
181
+ if not services:
182
+ click.echo("❌ 未找到服务类")
183
+ click.echo("\n可能的原因:")
184
+ click.echo(" 1. 当前目录下没有继承自 BaseService 的服务类")
185
+ click.echo(" 2. 服务类文件导入失败(可能是缺少依赖包)")
186
+ click.echo(" 3. 服务类未实现 process 方法")
187
+ click.echo("\n提示:")
188
+ click.echo(" - 检查是否有 example_service.py 等文件")
189
+ click.echo(" - 如果服务类需要额外的包,请在配置文件的 runtime_env.pip 中指定")
190
+ click.echo(" - 运行命令时查看上方的警告信息")
191
+ return
192
+
193
+ click.echo("📋 发现的服务类:")
194
+ for i, (module, cls) in enumerate(services, 1):
195
+ click.echo(f"{i}. {cls.__name__} (来自 {module})")
196
+
197
+ except Exception as e:
198
+ click.echo(f"❌ 错误: {e}", err=True)
199
+ sys.exit(1)
200
+
201
+
202
+ @cli.command()
203
+ @click.argument("config_path")
204
+ def validate(config_path: str):
205
+ """验证配置文件(支持本地文件路径或HTTP URL)"""
206
+ try:
207
+ config = ServerConfig.from_file(config_path)
208
+ click.echo("✅ 配置文件有效")
209
+ click.echo(json.dumps(config.to_dict(), indent=2, ensure_ascii=False))
210
+ except Exception as e:
211
+ click.echo(f"❌ 配置文件无效: {e}", err=True)
212
+ sys.exit(1)
213
+
214
+
215
+ @cli.command()
216
+ @click.argument("config_path")
217
+ @click.argument("data", required=False)
218
+ @click.option("--file", "-f", type=click.File("r"), help="从文件读取请求数据")
219
+ def test(config_path: str, data: Optional[str] = None, file: Optional[click.File] = None):
220
+ """测试服务(发送测试请求,支持本地文件路径或HTTP URL)"""
221
+ try:
222
+ # 加载配置
223
+ config = ServerConfig.from_file(config_path)
224
+
225
+ # 发现服务
226
+ import os
227
+ search_path = os.getcwd()
228
+ service_class = get_single_service_class(search_path=search_path)
229
+
230
+ # 准备请求数据
231
+ if file:
232
+ request_data = json.load(file)
233
+ elif data:
234
+ try:
235
+ request_data = json.loads(data)
236
+ except json.JSONDecodeError:
237
+ request_data = {"data": data}
238
+ else:
239
+ request_data = {"test": "default"}
240
+
241
+ # 创建服务实例
242
+ runtime_env_dict = None
243
+ if config.runtime_env:
244
+ runtime_env_dict = config.runtime_env.model_dump(exclude_none=True)
245
+ # 如果转换后的字典为空,设置为None
246
+ if not runtime_env_dict:
247
+ runtime_env_dict = None
248
+
249
+ service_config = ServiceConfig(
250
+ name=config.name + "-test",
251
+ version=config.version,
252
+ params=config.params,
253
+ runtime_env=runtime_env_dict
254
+ )
255
+
256
+ service = service_class(service_config)
257
+
258
+ # 发送测试请求
259
+ import asyncio
260
+ result = asyncio.run(service.process(request_data))
261
+
262
+ click.echo("✅ 测试结果:")
263
+ click.echo(json.dumps(result, indent=2, ensure_ascii=False))
264
+
265
+ except Exception as e:
266
+ click.echo(f"❌ 测试失败: {e}", err=True)
267
+ sys.exit(1)
268
+
269
+
270
+ def main():
271
+ """主入口点"""
272
+ cli()
273
+
274
+
275
+ if __name__ == "__main__":
276
+ main()
@@ -0,0 +1,169 @@
1
+ import json
2
+ import os
3
+ import ssl
4
+ import base64
5
+ from pathlib import Path
6
+ from typing import Dict, Any, Optional
7
+ from pydantic import BaseModel, Field, ValidationError, ConfigDict
8
+ import yaml
9
+ import urllib.request
10
+ import urllib.parse
11
+
12
+
13
+ class RuntimeEnv(BaseModel):
14
+ """运行时环境配置"""
15
+ pip: Optional[list] = None
16
+ conda: Optional[dict] = None
17
+ env_vars: Optional[Dict[str, str]] = None
18
+
19
+ model_config = ConfigDict(extra="ignore")
20
+
21
+
22
+ class ServerConfig(BaseModel):
23
+ """服务器配置"""
24
+ name: str = Field(..., description="服务名称")
25
+ version: Optional[str] = Field(None, description="服务版本")
26
+ runtime_env: Optional[RuntimeEnv] = None
27
+ params: Dict[str, Any] = Field(default_factory=dict)
28
+ work_flow_id: Optional[int] = Field(None, description="工作流ID")
29
+ work_flow_instance_id: Optional[int] = Field(None, description="工作流实例ID")
30
+ task_id: Optional[str] = Field(None, description="任务ID")
31
+
32
+ model_config = ConfigDict(extra="ignore")
33
+
34
+ @classmethod
35
+ def from_file(cls, config_path: str) -> "ServerConfig":
36
+ """
37
+ 从文件或HTTP URL加载配置
38
+
39
+ Args:
40
+ config_path: 配置文件路径(本地文件路径、HTTP URL 或 base64 编码的 URL)
41
+
42
+ Returns:
43
+ ServerConfig实例
44
+ """
45
+ # 检查是否是 base64 编码的 URL
46
+ decoded_path = cls._decode_base64_url(config_path)
47
+ if decoded_path:
48
+ config_path = decoded_path
49
+
50
+ # 判断是否为HTTP URL
51
+ parsed = urllib.parse.urlparse(config_path)
52
+ is_http = parsed.scheme in ('http', 'https')
53
+
54
+ if is_http:
55
+ # 从HTTP URL加载配置
56
+ try:
57
+ # 检查是否跳过 SSL 验证(通过环境变量控制)
58
+ skip_ssl_verify = os.environ.get('SKIP_SSL_VERIFY', 'false').lower() in ('true', '1', 'yes')
59
+
60
+ if parsed.scheme == 'https' and skip_ssl_verify:
61
+ # 创建不验证 SSL 证书的上下文(仅用于内部服务)
62
+ ssl_context = ssl.create_default_context()
63
+ ssl_context.check_hostname = False
64
+ ssl_context.verify_mode = ssl.CERT_NONE
65
+
66
+ # 创建请求
67
+ request = urllib.request.Request(config_path)
68
+ with urllib.request.urlopen(request, timeout=30, context=ssl_context) as response:
69
+ content = response.read().decode('utf-8')
70
+ data = json.loads(content)
71
+ else:
72
+ # 正常请求(验证 SSL 证书)
73
+ with urllib.request.urlopen(config_path, timeout=30) as response:
74
+ content = response.read().decode('utf-8')
75
+ data = json.loads(content)
76
+ except ssl.SSLError as e:
77
+ error_msg = (
78
+ f"SSL certificate verification failed for URL {config_path}.\n"
79
+ f"Error: {e}\n\n"
80
+ f"Solutions:\n"
81
+ f"1. For internal services, set environment variable: export SKIP_SSL_VERIFY=true\n"
82
+ f"2. Install the CA certificate bundle\n"
83
+ f"3. Use HTTP instead of HTTPS if security is not required"
84
+ )
85
+ raise ValueError(error_msg)
86
+ except urllib.error.URLError as e:
87
+ raise ValueError(f"Failed to load config from URL {config_path}: {e}")
88
+ except json.JSONDecodeError as e:
89
+ raise ValueError(f"Invalid JSON in config from URL {config_path}: {e}")
90
+ else:
91
+ # 从本地文件加载配置
92
+ path = Path(config_path)
93
+
94
+ if not path.exists():
95
+ raise FileNotFoundError(f"Config file not found: {config_path}")
96
+
97
+ # 根据扩展名解析
98
+ if path.suffix.lower() in ['.json']:
99
+ with open(path, 'r', encoding='utf-8') as f:
100
+ data = json.load(f)
101
+ elif path.suffix.lower() in ['.yaml', '.yml']:
102
+ with open(path, 'r', encoding='utf-8') as f:
103
+ data = yaml.safe_load(f)
104
+ else:
105
+ raise ValueError(f"Unsupported config file format: {path.suffix}")
106
+
107
+ # 如果 runtime_env 是字典且不为空,转换为 RuntimeEnv 对象
108
+ runtime_env = data.get("runtime_env")
109
+ if runtime_env is not None:
110
+ if isinstance(runtime_env, dict) and runtime_env:
111
+ data["runtime_env"] = RuntimeEnv(**runtime_env)
112
+ elif isinstance(runtime_env, dict) and not runtime_env:
113
+ # 空字典,设置为None
114
+ data["runtime_env"] = None
115
+ # 如果runtime_env是None,保持None不变
116
+
117
+ return cls(**data)
118
+
119
+ @staticmethod
120
+ def _decode_base64_url(config_path: str) -> Optional[str]:
121
+ """
122
+ 尝试解码 base64 编码的 URL
123
+
124
+ Args:
125
+ config_path: 可能是 base64 编码的字符串
126
+
127
+ Returns:
128
+ 解码后的 URL,如果不是 base64 编码则返回 None
129
+ """
130
+ # 检查是否是 base64 编码(base64 字符串通常只包含 A-Z, a-z, 0-9, +, /, =)
131
+ # 并且长度合理(至少 10 个字符)
132
+ if len(config_path) < 10:
133
+ return None
134
+
135
+ # 检查是否看起来像 base64(不包含常见的路径分隔符和协议前缀)
136
+ if config_path.startswith(('http://', 'https://', '/', './', '../')):
137
+ return None
138
+
139
+ # 检查是否包含 base64 字符集
140
+ base64_chars = set('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=')
141
+ if not all(c in base64_chars or c.isspace() for c in config_path):
142
+ return None
143
+
144
+ try:
145
+ # 移除可能的空白字符
146
+ clean_path = config_path.strip()
147
+ # 尝试解码
148
+ decoded_bytes = base64.b64decode(clean_path, validate=True)
149
+ decoded_str = decoded_bytes.decode('utf-8')
150
+
151
+ # 验证解码后的字符串是否是有效的 URL
152
+ parsed = urllib.parse.urlparse(decoded_str)
153
+ if parsed.scheme in ('http', 'https'):
154
+ return decoded_str
155
+ # 如果不是 URL,可能是误判,返回 None
156
+ return None
157
+ except Exception:
158
+ # 解码失败,不是 base64 编码
159
+ return None
160
+
161
+ def to_dict(self) -> Dict[str, Any]:
162
+ """转换为字典"""
163
+ data = self.model_dump(exclude_none=True)
164
+ if self.runtime_env:
165
+ runtime_env_dict = self.runtime_env.model_dump(exclude_none=True)
166
+ # 如果转换后的字典不为空,才添加到结果中
167
+ if runtime_env_dict:
168
+ data["runtime_env"] = runtime_env_dict
169
+ return data