PyPI - hello-datap-component-base - Versions diffs - 0.2.0__py3-none-any.whl - Mend

hello-datap-component-base 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

hello_datap_component_base/__init__.py +31 -0
hello_datap_component_base/base.py +211 -0
hello_datap_component_base/cli.py +276 -0
hello_datap_component_base/config.py +169 -0
hello_datap_component_base/discover.py +187 -0
hello_datap_component_base/logger.py +290 -0
hello_datap_component_base/mns_client.py +286 -0
hello_datap_component_base/runner.py +247 -0
hello_datap_component_base-0.2.0.dist-info/METADATA +596 -0
hello_datap_component_base-0.2.0.dist-info/RECORD +13 -0
hello_datap_component_base-0.2.0.dist-info/WHEEL +5 -0
hello_datap_component_base-0.2.0.dist-info/entry_points.txt +2 -0
hello_datap_component_base-0.2.0.dist-info/top_level.txt +1 -0

hello_datap_component_base/__init__.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""
+数据处理平台组件基类 - 统一的服务管理框架
+提供标准化的数据处理组件开发框架，统一用户代码的入参和出参以及程序执行入口。
+"""
+from .base import BaseService, ServiceConfig
+from .runner import ServiceRunner
+from .config import ServerConfig, RuntimeEnv
+from .logger import setup_logging, get_service_logger
+from .discover import find_service_classes, get_single_service_class
+# 导入 logger 实例
+from .logger import logger
+__version__ = "0.1.9"
+__author__ = "zhaohaidong"
+__email__ = "zhaohaidong389@hellobike.com"
+__all__ = [
+    "BaseService",
+    "ServiceConfig",
+    "ServerConfig",
+    "RuntimeEnv",
+    "ServiceRunner",
+    "setup_logging",
+    "get_service_logger",
+    "logger",
+    "find_service_classes",
+    "get_single_service_class",
+]

hello_datap_component_base/base.py ADDED Viewed

@@ -0,0 +1,211 @@
+import asyncio
+import time
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Optional
+from pydantic import BaseModel, ConfigDict
+class ServiceConfig(BaseModel):
+    """服务配置基类"""
+    name: str
+    version: Optional[str] = None
+    params: Dict[str, Any] = {}
+    runtime_env: Optional[Dict[str, Any]] = None
+    work_flow_id: Optional[int] = None
+    work_flow_instance_id: Optional[int] = None
+    task_id: Optional[str] = None
+    model_config = ConfigDict(extra="ignore")
+class BaseService(ABC):
+    """服务基类，所有用户服务必须继承此类"""
+    _config: Optional[ServiceConfig] = None
+    _logger = None
+    def __init__(self, config: ServiceConfig):
+        self._config = config
+        self._setup_logger()
+        self._setup_runtime_env()
+        # 记录服务初始化日志（版本信息会自动添加）
+        self.logger.info("Service initialized")
+    def _setup_logger(self):
+        """设置日志器"""
+        from .logger import get_service_logger, set_service_context
+        # 设置全局服务上下文，使全局 logger 自动包含服务信息
+        set_service_context(self._config.name, self._config.version)
+        self._logger = get_service_logger(self._config.name, self._config.version)
+    def _setup_runtime_env(self):
+        """设置运行时环境"""
+        if self._config.runtime_env:
+            # 这里可以设置环境变量等
+            import os
+            env_vars = self._config.runtime_env.get("env_vars")
+            if env_vars and isinstance(env_vars, dict):
+                for key, value in env_vars.items():
+                    os.environ[key] = str(value)
+    @property
+    def config(self) -> ServiceConfig:
+        """获取配置"""
+        if self._config is None:
+            raise ValueError("Service not initialized with config")
+        return self._config
+    @property
+    def logger(self):
+        """获取日志器"""
+        if self._logger is None:
+            self._setup_logger()
+        return self._logger
+    @property
+    def params(self) -> Dict[str, Any]:
+        """获取参数"""
+        return self.config.params
+    @abstractmethod
+    async def process(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        处理请求的抽象方法，子类必须实现
+        Args:
+            data: 输入数据
+        Returns:
+            处理结果
+        """
+        pass
+    async def pre_process(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        预处理钩子，子类可重写
+        Args:
+            data: 原始数据
+        Returns:
+            处理后的数据
+        """
+        return data
+    async def post_process(self, data: Dict[str, Any], result: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        后处理钩子，子类可重写
+        Args:
+            data: 原始数据
+            result: 处理结果
+        Returns:
+            最终结果
+        """
+        return result
+    def _format_result(
+        self,
+        code: int,
+        message: str,
+        output: Optional[Dict[str, Any]] = None,
+        processing_time: Optional[float] = None
+    ) -> Dict[str, Any]:
+        """
+        格式化返回结果
+        Args:
+            code: 返回码，0 表示成功，非 0 表示失败
+            message: 返回消息
+            output: 用户程序的输出结果
+            processing_time: 处理时间（秒）
+        Returns:
+            格式化后的结果字典
+        """
+        result = {
+            "code": code,
+            "message": message,
+            "data": {
+                "work_flow_id": self._config.work_flow_id,
+                "work_flow_instance_id": self._config.work_flow_instance_id,
+                "task_id": self._config.task_id,
+                "out_put": output
+            }
+        }
+        if processing_time is not None:
+            result["processing_time"] = processing_time
+        return result
+    async def handle_request(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        完整的请求处理流程
+        Args:
+            data: 输入数据
+        Returns:
+            封装后的处理结果
+        """
+        import json
+        # 记录开始时间
+        start_time = time.time()
+        try:
+            # 记录入参日志（服务名称和版本会自动添加）
+            self.logger.info(
+                f"Processing request - Input: {json.dumps(data, ensure_ascii=False)}",
+                extra={"input_data": data}
+            )
+            # 预处理
+            processed_data = await self.pre_process(data)
+            # 执行业务逻辑
+            result = await self.process(processed_data)
+            # 后处理
+            final_result = await self.post_process(processed_data, result)
+            # 计算处理时间
+            processing_time = time.time() - start_time
+            # 记录结果日志（服务名称和版本会自动添加）
+            self.logger.info(
+                f"Request processed successfully - Result: {json.dumps(final_result, ensure_ascii=False)}, Processing time: {processing_time:.3f}s",
+                extra={"result": final_result, "processing_time": processing_time}
+            )
+            # 封装返回结果
+            formatted_result = self._format_result(
+                code=0,
+                message="success",
+                output=final_result,
+                processing_time=processing_time
+            )
+            return formatted_result
+        except Exception as e:
+            # 计算处理时间（即使发生异常也记录）
+            processing_time = time.time() - start_time
+            # 记录错误日志（服务名称和版本会自动添加）
+            error_msg = str(e)
+            self.logger.error(
+                f"Error processing request: {error_msg}, Processing time: {processing_time:.3f}s",
+                extra={"error": error_msg, "error_type": type(e).__name__, "processing_time": processing_time}
+            )
+            # 封装异常返回结果
+            formatted_result = self._format_result(
+                code=-1,
+                message=error_msg,
+                output=None,
+                processing_time=processing_time
+            )
+            return formatted_result

hello_datap_component_base/cli.py ADDED Viewed

@@ -0,0 +1,276 @@
+import click
+import json
+import sys
+from pathlib import Path
+from typing import Optional
+from .base import ServiceConfig
+from .runner import ServiceRunner
+from .discover import find_service_classes, get_single_service_class
+from .config import ServerConfig
+@click.group()
+@click.version_option(version="0.1.9")
+def cli():
+    """数据处理平台组件基类 - 统一的服务管理框架"""
+    pass
+@cli.command()
+@click.argument("config_path")
+@click.option("--class-name", "-c", help="指定要使用的服务类名")
+def start(config_path: str, class_name: Optional[str] = None):
+    """
+    启动服务并执行一次处理（支持本地文件路径或HTTP URL）
+    输入数据从配置文件的 params.input_data 中获取。
+    如果 params.input_data 不存在，将使用默认测试数据。
+    """
+    runner = ServiceRunner(config_path, class_name)
+    runner.run()
+@cli.command()
+def init():
+    """初始化示例项目"""
+    # 创建示例目录结构
+    example_dir = Path("example_service")
+    example_dir.mkdir(exist_ok=True)
+    # 创建示例配置文件
+    config = {
+        "name": "example-service",
+        "version": "1.0.0",
+        "runtime_env": {
+            "pip": ["requests>=2.25.0"],
+            "env_vars": {
+                "LOG_LEVEL": "INFO",
+                "ENV": "development"
+            }
+        },
+        "params": {
+            "example_param": "value"
+        }
+    }
+    config_file = example_dir / "config.json"
+    with open(config_file, "w", encoding="utf-8") as f:
+        json.dump(config, f, indent=2, ensure_ascii=False)
+    # 创建示例服务代码
+    example_code = '''#!/usr/bin/env python3
+"""
+示例服务
+"""
+import asyncio
+from hello_datap_component_base import BaseService
+class ExampleService(BaseService):
+    """示例服务实现"""
+    async def process(self, data: dict) -> dict:
+        """处理请求的业务逻辑"""
+        self.logger.info(f"收到请求数据: {data}")
+        # 模拟处理逻辑
+        await asyncio.sleep(0.1)
+        # 返回结果
+        result = {
+            "status": "success",
+            "message": f"Hello, {data.get('name', 'World')}!",
+            "processed_data": {
+                    "original": data,
+                "extra_info": self.params
+            },
+            "timestamp": asyncio.get_event_loop().time()
+        }
+        return result
+if __name__ == "__main__":
+    # 本地测试
+    import json
+    from hello_datap_component_base import ServiceConfig
+    async def test():
+        config = ServiceConfig(
+            name="test-example",
+            params={"test": "value"}
+        )
+        service = ExampleService(config)
+        # 测试请求
+        result = await service.process({"name": "Test User"})
+        print(json.dumps(result, indent=2, ensure_ascii=False))
+    asyncio.run(test())
+'''
+    code_file = example_dir / "service.py"
+    with open(code_file, "w", encoding="utf-8") as f:
+        f.write(example_code)
+    # 创建 README
+    readme = f"""# 示例服务
+    这是一个通过 component_manager 创建的服务示例。
+    ## 文件结构
+    - `config.json` - 服务配置文件
+    - `service.py` - 服务实现代码
+    ## 启动服务
+    ```bash
+    component_manager start config.json
+    ```
+    ## 本地测试
+    ```bash
+    cd {example_dir}
+    python service.py
+    ```
+    ## 配置说明
+    配置文件包含以下主要部分：
+    1. **name**: 服务名称
+    2. **version**: 服务版本（可选）
+    3. **runtime_env**: 运行时环境
+       - pip: Python 依赖包
+       - env_vars: 环境变量
+    4. **params**: 服务参数
+    """
+    readme_file = example_dir / "README.md"
+    with open(readme_file, "w", encoding="utf-8") as f:
+        f.write(readme)
+    click.echo(f"✅ 示例项目已创建在: {example_dir}")
+    click.echo(f"📁 配置文件: {config_file}")
+    click.echo(f"🐍 示例代码: {code_file}")
+    click.echo("\n启动服务:")
+    click.echo(f"  cd {example_dir}")
+    click.echo(f"  component_manager start config.json")
+@cli.command()
+@click.option("--json", "-j", "json_format", is_flag=True, help="JSON 格式输出")
+def list(json_format: bool = False):
+    """列出可用的服务类"""
+    try:
+        import os
+        # 确保使用当前工作目录
+        search_path = os.getcwd()
+        services = find_service_classes(search_path)
+        if json_format:
+            output = [
+                {
+                    "module": module,
+                    "class": cls.__name__,
+                    "file": getattr(cls, "__module__", "unknown"),
+                }
+                for module, cls in services
+            ]
+            click.echo(json.dumps(output, indent=2, ensure_ascii=False))
+        else:
+            if not services:
+                click.echo("❌ 未找到服务类")
+                click.echo("\n可能的原因：")
+                click.echo("  1. 当前目录下没有继承自 BaseService 的服务类")
+                click.echo("  2. 服务类文件导入失败（可能是缺少依赖包）")
+                click.echo("  3. 服务类未实现 process 方法")
+                click.echo("\n提示：")
+                click.echo("  - 检查是否有 example_service.py 等文件")
+                click.echo("  - 如果服务类需要额外的包，请在配置文件的 runtime_env.pip 中指定")
+                click.echo("  - 运行命令时查看上方的警告信息")
+                return
+            click.echo("📋 发现的服务类:")
+            for i, (module, cls) in enumerate(services, 1):
+                click.echo(f"{i}. {cls.__name__} (来自 {module})")
+    except Exception as e:
+        click.echo(f"❌ 错误: {e}", err=True)
+        sys.exit(1)
+@cli.command()
+@click.argument("config_path")
+def validate(config_path: str):
+    """验证配置文件（支持本地文件路径或HTTP URL）"""
+    try:
+        config = ServerConfig.from_file(config_path)
+        click.echo("✅ 配置文件有效")
+        click.echo(json.dumps(config.to_dict(), indent=2, ensure_ascii=False))
+    except Exception as e:
+        click.echo(f"❌ 配置文件无效: {e}", err=True)
+        sys.exit(1)
+@cli.command()
+@click.argument("config_path")
+@click.argument("data", required=False)
+@click.option("--file", "-f", type=click.File("r"), help="从文件读取请求数据")
+def test(config_path: str, data: Optional[str] = None, file: Optional[click.File] = None):
+    """测试服务（发送测试请求，支持本地文件路径或HTTP URL）"""
+    try:
+        # 加载配置
+        config = ServerConfig.from_file(config_path)
+        # 发现服务
+        import os
+        search_path = os.getcwd()
+        service_class = get_single_service_class(search_path=search_path)
+        # 准备请求数据
+        if file:
+            request_data = json.load(file)
+        elif data:
+            try:
+                request_data = json.loads(data)
+            except json.JSONDecodeError:
+                request_data = {"data": data}
+        else:
+            request_data = {"test": "default"}
+        # 创建服务实例
+        runtime_env_dict = None
+        if config.runtime_env:
+            runtime_env_dict = config.runtime_env.model_dump(exclude_none=True)
+            # 如果转换后的字典为空，设置为None
+            if not runtime_env_dict:
+                runtime_env_dict = None
+        service_config = ServiceConfig(
+            name=config.name + "-test",
+            version=config.version,
+            params=config.params,
+            runtime_env=runtime_env_dict
+        )
+        service = service_class(service_config)
+        # 发送测试请求
+        import asyncio
+        result = asyncio.run(service.process(request_data))
+        click.echo("✅ 测试结果:")
+        click.echo(json.dumps(result, indent=2, ensure_ascii=False))
+    except Exception as e:
+        click.echo(f"❌ 测试失败: {e}", err=True)
+        sys.exit(1)
+def main():
+    """主入口点"""
+    cli()
+if __name__ == "__main__":
+    main()

hello_datap_component_base/config.py ADDED Viewed

@@ -0,0 +1,169 @@
+import json
+import os
+import ssl
+import base64
+from pathlib import Path
+from typing import Dict, Any, Optional
+from pydantic import BaseModel, Field, ValidationError, ConfigDict
+import yaml
+import urllib.request
+import urllib.parse
+class RuntimeEnv(BaseModel):
+    """运行时环境配置"""
+    pip: Optional[list] = None
+    conda: Optional[dict] = None
+    env_vars: Optional[Dict[str, str]] = None
+    model_config = ConfigDict(extra="ignore")
+class ServerConfig(BaseModel):
+    """服务器配置"""
+    name: str = Field(..., description="服务名称")
+    version: Optional[str] = Field(None, description="服务版本")
+    runtime_env: Optional[RuntimeEnv] = None
+    params: Dict[str, Any] = Field(default_factory=dict)
+    work_flow_id: Optional[int] = Field(None, description="工作流ID")
+    work_flow_instance_id: Optional[int] = Field(None, description="工作流实例ID")
+    task_id: Optional[str] = Field(None, description="任务ID")
+    model_config = ConfigDict(extra="ignore")
+    @classmethod
+    def from_file(cls, config_path: str) -> "ServerConfig":
+        """
+        从文件或HTTP URL加载配置
+        Args:
+            config_path: 配置文件路径（本地文件路径、HTTP URL 或 base64 编码的 URL）
+        Returns:
+            ServerConfig实例
+        """
+        # 检查是否是 base64 编码的 URL
+        decoded_path = cls._decode_base64_url(config_path)
+        if decoded_path:
+            config_path = decoded_path
+        # 判断是否为HTTP URL
+        parsed = urllib.parse.urlparse(config_path)
+        is_http = parsed.scheme in ('http', 'https')
+        if is_http:
+            # 从HTTP URL加载配置
+            try:
+                # 检查是否跳过 SSL 验证（通过环境变量控制）
+                skip_ssl_verify = os.environ.get('SKIP_SSL_VERIFY', 'false').lower() in ('true', '1', 'yes')
+                if parsed.scheme == 'https' and skip_ssl_verify:
+                    # 创建不验证 SSL 证书的上下文（仅用于内部服务）
+                    ssl_context = ssl.create_default_context()
+                    ssl_context.check_hostname = False
+                    ssl_context.verify_mode = ssl.CERT_NONE
+                    # 创建请求
+                    request = urllib.request.Request(config_path)
+                    with urllib.request.urlopen(request, timeout=30, context=ssl_context) as response:
+                        content = response.read().decode('utf-8')
+                        data = json.loads(content)
+                else:
+                    # 正常请求（验证 SSL 证书）
+                    with urllib.request.urlopen(config_path, timeout=30) as response:
+                        content = response.read().decode('utf-8')
+                        data = json.loads(content)
+            except ssl.SSLError as e:
+                error_msg = (
+                    f"SSL certificate verification failed for URL {config_path}.\n"
+                    f"Error: {e}\n\n"
+                    f"Solutions:\n"
+                    f"1. For internal services, set environment variable: export SKIP_SSL_VERIFY=true\n"
+                    f"2. Install the CA certificate bundle\n"
+                    f"3. Use HTTP instead of HTTPS if security is not required"
+                )
+                raise ValueError(error_msg)
+            except urllib.error.URLError as e:
+                raise ValueError(f"Failed to load config from URL {config_path}: {e}")
+            except json.JSONDecodeError as e:
+                raise ValueError(f"Invalid JSON in config from URL {config_path}: {e}")
+        else:
+            # 从本地文件加载配置
+            path = Path(config_path)
+            if not path.exists():
+                raise FileNotFoundError(f"Config file not found: {config_path}")
+            # 根据扩展名解析
+            if path.suffix.lower() in ['.json']:
+                with open(path, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+            elif path.suffix.lower() in ['.yaml', '.yml']:
+                with open(path, 'r', encoding='utf-8') as f:
+                    data = yaml.safe_load(f)
+            else:
+                raise ValueError(f"Unsupported config file format: {path.suffix}")
+        # 如果 runtime_env 是字典且不为空，转换为 RuntimeEnv 对象
+        runtime_env = data.get("runtime_env")
+        if runtime_env is not None:
+            if isinstance(runtime_env, dict) and runtime_env:
+                data["runtime_env"] = RuntimeEnv(**runtime_env)
+            elif isinstance(runtime_env, dict) and not runtime_env:
+                # 空字典，设置为None
+                data["runtime_env"] = None
+            # 如果runtime_env是None，保持None不变
+        return cls(**data)
+    @staticmethod
+    def _decode_base64_url(config_path: str) -> Optional[str]:
+        """
+        尝试解码 base64 编码的 URL
+        Args:
+            config_path: 可能是 base64 编码的字符串
+        Returns:
+            解码后的 URL，如果不是 base64 编码则返回 None
+        """
+        # 检查是否是 base64 编码（base64 字符串通常只包含 A-Z, a-z, 0-9, +, /, =）
+        # 并且长度合理（至少 10 个字符）
+        if len(config_path) < 10:
+            return None
+        # 检查是否看起来像 base64（不包含常见的路径分隔符和协议前缀）
+        if config_path.startswith(('http://', 'https://', '/', './', '../')):
+            return None
+        # 检查是否包含 base64 字符集
+        base64_chars = set('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=')
+        if not all(c in base64_chars or c.isspace() for c in config_path):
+            return None
+        try:
+            # 移除可能的空白字符
+            clean_path = config_path.strip()
+            # 尝试解码
+            decoded_bytes = base64.b64decode(clean_path, validate=True)
+            decoded_str = decoded_bytes.decode('utf-8')
+            # 验证解码后的字符串是否是有效的 URL
+            parsed = urllib.parse.urlparse(decoded_str)
+            if parsed.scheme in ('http', 'https'):
+                return decoded_str
+            # 如果不是 URL，可能是误判，返回 None
+            return None
+        except Exception:
+            # 解码失败，不是 base64 编码
+            return None
+    def to_dict(self) -> Dict[str, Any]:
+        """转换为字典"""
+        data = self.model_dump(exclude_none=True)
+        if self.runtime_env:
+            runtime_env_dict = self.runtime_env.model_dump(exclude_none=True)
+            # 如果转换后的字典不为空，才添加到结果中
+            if runtime_env_dict:
+                data["runtime_env"] = runtime_env_dict
+        return data