PyPI - PyREUser3 - Versions diffs - 0.1.0__py3-none-any.whl - Mend

PyREUser3 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

pyreuser3/__init__.py +65 -0
pyreuser3/__main__.py +7 -0
pyreuser3/api.py +410 -0
pyreuser3/cli.py +207 -0
pyreuser3/core.py +358 -0
pyreuser3/export/__init__.py +11 -0
pyreuser3/export/base.py +245 -0
pyreuser3/export/enums.py +171 -0
pyreuser3/export/fields.py +231 -0
pyreuser3/export/metadata.py +304 -0
pyreuser3/export/postprocess.py +346 -0
pyreuser3/export/tree.py +289 -0
pyreuser3/export/user3.py +415 -0
pyreuser3/pack/__init__.py +10 -0
pyreuser3/pack/base.py +281 -0
pyreuser3/pack/models.py +140 -0
pyreuser3/pack/plan.py +566 -0
pyreuser3/pack/writer.py +313 -0
pyreuser3/rich_ui.py +126 -0
pyreuser3/schema.py +193 -0
pyreuser3/web/__init__.py +6 -0
pyreuser3/web/__main__.py +6 -0
pyreuser3/web/handler.py +178 -0
pyreuser3/web/jobs.py +243 -0
pyreuser3/web/page.py +221 -0
pyreuser3/web/picker.py +92 -0
pyreuser3/web/runners.py +238 -0
pyreuser3/web/server.py +104 -0
pyreuser3/web/settings.py +42 -0
pyreuser3-0.1.0.dist-info/METADATA +165 -0
pyreuser3-0.1.0.dist-info/RECORD +35 -0
pyreuser3-0.1.0.dist-info/WHEEL +5 -0
pyreuser3-0.1.0.dist-info/entry_points.txt +3 -0
pyreuser3-0.1.0.dist-info/licenses/LICENSE +21 -0
pyreuser3-0.1.0.dist-info/top_level.txt +1 -0

pyreuser3/core.py ADDED Viewed

@@ -0,0 +1,358 @@
+"""`.user.3` 解析与封包共享的基础设施。
+这里放置不依赖具体导出器/封包器的通用能力：magic 默认值、二进制读取、
+字段与类型定义、RE_RSZ 模板加载、字符串/GUID 规范化等。
+"""
+from __future__ import annotations
+import re
+import struct
+import uuid
+from pathlib import Path
+from typing import Any
+# `.user.3` 文件最外层 USR 头使用的默认 magic（小端 "USR\0"）。
+USR_MAGIC = 5395285
+# 内嵌 RSZ 数据块使用的默认 magic（小端 "RSZ\0"）。
+RSZ_MAGIC = 5919570
+# 完整实例表封包 JSON 的格式标识，用于识别可稳定回封的文档。
+PACK_JSON_FORMAT = "re_user3_pack_v1"
+# 匹配不含分隔符的 32 位十六进制字符串（用于识别 GUID 文本）。
+HEX32_RE = re.compile(r"^[0-9a-fA-F]{32}$")
+# REFramework dump 中枚举类型里需要忽略的占位字段名。
+ENUM_UNUSED_KEY = "value__"
+class ParseError(RuntimeError):
+    """解析或封包过程中发现二进制结构不符合预期时抛出的异常。
+    继承自 :class:`RuntimeError`，用于把“数据格式不对”这类可预期的错误
+    与程序自身的逻辑错误区分开，便于上层批处理捕获并单独统计失败文件。
+    """
+    pass
+def align(value: int, alignment: int) -> int:
+    """把整数偏移向上对齐到指定边界。
+    参数：
+        value (int): 当前偏移。
+        alignment (int): 对齐粒度；小于等于 1 时不做处理。
+    返回：
+        int: 对齐后的偏移（大于等于 ``value`` 的最小满足边界的值）。
+    """
+    if alignment <= 1:
+        return value
+    # 经典的二进制向上取整：先加 (alignment-1)，再用位与清除低位。
+    return (value + (alignment - 1)) & ~(alignment - 1)
+def format_guid_text_from_hex32(hex32: str) -> str:
+    """把 32 位十六进制文本格式化为标准 GUID 文本。
+    参数：
+        hex32 (str): 不带分隔符的 32 位十六进制字符串。
+    返回：
+        str: 形如 ``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`` 的 GUID 文本。
+    """
+    h = hex32.lower()
+    # 按 8-4-4-4-12 的标准分组插入连字符。
+    return f"{h[0:8]}-{h[8:12]}-{h[12:16]}-{h[16:20]}-{h[20:32]}"
+def normalize_guid_candidate_text(text: str) -> str:
+    """在字符串看起来像 GUID 时进行规范化。
+    参数：
+        text (str): 原始字符串，可能包含 ``{}`` 包裹或 ``-`` 分隔符。
+    返回：
+        str: 可识别为 GUID 时返回标准 GUID 文本，否则原样返回输入。
+    """
+    # 去掉首尾空白与花括号，再剥离连字符，得到纯十六进制候选。
+    stripped = text.strip().strip("{}")
+    compact = stripped.replace("-", "")
+    if HEX32_RE.fullmatch(compact):
+        return format_guid_text_from_hex32(compact)
+    return text
+def resolve_schema_path(schema_path_or_dir: str | Path) -> Path:
+    """校验并返回用户显式提供的 RE_RSZ 模板文件路径。
+    新逻辑要求依赖文件全部显式传入，因此这里故意拒绝目录路径，
+    避免在多个游戏模板共存时自动匹配到错误文件。
+    参数：
+        schema_path_or_dir (str | Path): 期望指向具体模板 JSON 文件的路径。
+    返回：
+        Path: 校验通过的模板文件路径。
+    异常：
+        FileNotFoundError: 当路径是目录或不存在时抛出。
+    """
+    path = Path(schema_path_or_dir)
+    if path.is_file():
+        return path
+    if path.is_dir():
+        raise FileNotFoundError(
+            f"schema must be an explicit RE RSZ json file, not a directory: {path}"
+        )
+    raise FileNotFoundError(f"schema file not found: {path}")
+class BinaryReader:
+    """带边界检查的小端二进制读取器。
+    封装一个只读字节缓冲区和读取游标，提供各种定宽整数/浮点数以及
+    字符串的读取方法，并在越界时抛出 :class:`ParseError`，避免错误模板
+    导致越界访问破坏后续解析。
+    """
+    def __init__(self, data: bytes):
+        """初始化读取器。
+        参数：
+            data (bytes): 源字节缓冲区，读取过程中不会被修改。
+        """
+        self.data = data
+        # pos 是相对缓冲区起点的绝对读取游标，初始指向开头。
+        self.pos = 0
+    @property
+    def size(self) -> int:
+        """缓冲区总长度。
+        返回：
+            int: 源字节缓冲区的字节数。
+        """
+        return len(self.data)
+    def tell(self) -> int:
+        """返回当前读取游标。
+        返回：
+            int: 当前相对缓冲区起点的绝对偏移。
+        """
+        return self.pos
+    def seek(self, pos: int) -> None:
+        """把游标移动到绝对偏移。
+        参数：
+            pos (int): 目标绝对偏移，必须落在 ``[0, size]`` 区间内。
+        返回：
+            None: 仅更新内部游标。
+        异常：
+            ParseError: 当目标偏移越界时抛出。
+        """
+        if pos < 0 or pos > self.size:
+            raise ParseError(f"seek out of range: {pos}")
+        self.pos = pos
+    def read(self, n: int) -> bytes:
+        """读取指定长度的字节并推进游标。
+        参数：
+            n (int): 要读取的字节数。
+        返回：
+            bytes: 读取出的字节序列，长度为 ``n``。
+        异常：
+            ParseError: 当剩余字节不足 ``n`` 时抛出。
+        """
+        end = self.pos + n
+        if end > self.size:
+            raise ParseError(f"read out of range: {self.pos}+{n}")
+        out = self.data[self.pos : end]
+        self.pos = end
+        return out
+    def read_struct(self, fmt: str) -> Any:
+        """按 ``struct`` 格式读取并解包一个值。
+        参数：
+            fmt (str): :func:`struct.unpack` 使用的格式字符串，应只描述单个值。
+        返回：
+            Any: 解包后的单个值，具体类型取决于 ``fmt``。
+        """
+        size = struct.calcsize(fmt)
+        raw = self.read(size)
+        return struct.unpack(fmt, raw)[0]
+    def read_u8(self) -> int:
+        """读取一个无符号 8 位整数（1 字节）。
+        返回：
+            int: 取值范围 0-255 的无符号整数。
+        """
+        return self.read_struct("<B")
+    def read_s8(self) -> int:
+        """读取一个有符号 8 位整数（1 字节）。
+        返回：
+            int: 取值范围 -128~127 的有符号整数。
+        """
+        return self.read_struct("<b")
+    def read_u16(self) -> int:
+        """读取一个无符号 16 位整数（2 字节，小端）。
+        返回：
+            int: 取值范围 0-65535 的无符号整数。
+        """
+        return self.read_struct("<H")
+    def read_s16(self) -> int:
+        """读取一个有符号 16 位整数（2 字节，小端）。
+        返回：
+            int: 取值范围 -32768~32767 的有符号整数。
+        """
+        return self.read_struct("<h")
+    def read_u32(self) -> int:
+        """读取一个无符号 32 位整数（4 字节，小端）。
+        返回：
+            int: 无符号 32 位整数。
+        """
+        return self.read_struct("<I")
+    def read_s32(self) -> int:
+        """读取一个有符号 32 位整数（4 字节，小端）。
+        返回：
+            int: 有符号 32 位整数。
+        """
+        return self.read_struct("<i")
+    def read_u64(self) -> int:
+        """读取一个无符号 64 位整数（8 字节，小端）。
+        返回：
+            int: 无符号 64 位整数。
+        """
+        return self.read_struct("<Q")
+    def read_s64(self) -> int:
+        """读取一个有符号 64 位整数（8 字节，小端）。
+        返回：
+            int: 有符号 64 位整数。
+        """
+        return self.read_struct("<q")
+    def read_f32(self) -> float:
+        """读取一个 32 位单精度浮点数（4 字节，小端）。
+        返回：
+            float: 解析出的单精度浮点数。
+        """
+        return self.read_struct("<f")
+    def read_f64(self) -> float:
+        """读取一个 64 位双精度浮点数（8 字节，小端）。
+        返回：
+            float: 解析出的双精度浮点数。
+        """
+        return self.read_struct("<d")
+    def read_wstring_null(self, offset: int) -> str:
+        """从绝对偏移读取以空字符结尾的 UTF-16LE 字符串。
+        与游标无关：本方法直接按给定偏移读取，不改变 ``self.pos``，
+        适合解析头部路径表这类“偏移指向别处”的字符串。
+        参数：
+            offset (int): 字符串起始的绝对偏移。
+        返回：
+            str: 解码并规范化后的字符串；偏移越界时返回空字符串。
+        """
+        if offset < 0 or offset >= self.size:
+            return ""
+        out: list[int] = []
+        i = offset
+        # RE Engine 路径表常以 UTF-16LE 存储，并由 0 结束。
+        while i + 1 < self.size:
+            ch = struct.unpack_from("<H", self.data, i)[0]
+            i += 2
+            if ch == 0:
+                break
+            out.append(ch)
+        return normalize_guid_candidate_text("".join(chr(c) for c in out))
+def read_len_utf16(reader: BinaryReader) -> str:
+    """读取带 4 字节长度前缀的 UTF-16LE 字符串。
+    参数：
+        reader (BinaryReader): 二进制读取器，会从其当前游标处读取。
+    返回：
+        str: 解码并去掉结尾空字符、规范化后的字符串；长度异常时返回空字符串。
+    """
+    # 字符串前的长度字段按 4 字节对齐。
+    reader.seek(align(reader.tell(), 4))
+    length = reader.read_u32()
+    if length == 0:
+        return ""
+    remaining_chars = (reader.size - reader.tell()) // 2
+    # 长度异常时返回空字符串，而不是继续越界读取破坏后续解析。
+    if length > remaining_chars or length > 2_000_000:
+        return ""
+    raw = reader.read(length * 2)
+    decoded = raw.decode("utf-16-le", errors="replace").rstrip("\x00")
+    return normalize_guid_candidate_text(decoded)
+def read_len_c8(reader: BinaryReader) -> str:
+    """读取带 4 字节长度前缀的 UTF-8/C8 字符串。
+    参数：
+        reader (BinaryReader): 二进制读取器，会从其当前游标处读取。
+    返回：
+        str: 解码并去掉结尾空字符、规范化后的字符串；长度异常时返回空字符串。
+    """
+    reader.seek(align(reader.tell(), 4))
+    length = reader.read_u32()
+    if length == 0:
+        return ""
+    remaining = reader.size - reader.tell()
+    if length > remaining or length > 2_000_000:
+        return ""
+    raw = reader.read(length)
+    decoded = raw.decode("utf-8", errors="replace").rstrip("\x00")
+    return normalize_guid_candidate_text(decoded)
+def read_guid_like(reader: BinaryReader) -> str:
+    """读取 16 字节 GUID 数据并规范化为标准文本。
+    参数：
+        reader (BinaryReader): 二进制读取器，会从其当前游标处读取 16 字节。
+    返回：
+        str: 标准 GUID 文本；无法按 UUID 解析时退回十六进制格式化结果。
+    """
+    raw = reader.read(16)
+    try:
+        # RE Engine 的 GUID 以小端字节序存储，使用 bytes_le 还原。
+        return str(uuid.UUID(bytes_le=raw))
+    except Exception:
+        return format_guid_text_from_hex32(raw.hex())

pyreuser3/export/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""导出功能子包。
+这个子包负责把 RE Engine 的 `.user.3` 二进制数据库解析成 JSON。
+外部调用通常不需要关心内部拆分，直接从 `pyreuser3` 导入
+`User3Exporter` 即可；保留这个子包入口是为了方便后续继续扩展
+解析链路。
+"""
+from .base import User3Exporter
+__all__ = ["User3Exporter"]

pyreuser3/export/base.py ADDED Viewed

@@ -0,0 +1,245 @@
+"""`.user.3` 到 JSON 的导出器入口。
+`User3Exporter` 通过多个 Mixin 组合出完整的解析链路：读取 USR/RSZ 结构、
+解析字段、构建对象引用树、应用枚举元数据并做后处理。本文件只负责装配
+这些能力、管理批处理流程，以及处理文件发现与输出路径计算。
+"""
+from __future__ import annotations
+import json
+import re
+from pathlib import Path
+from .enums import ExporterEnumSourceMixin
+from .fields import ExporterFieldParserMixin
+from .metadata import ExporterMetadataMixin
+from .postprocess import ExporterPostprocessMixin
+from .tree import ExporterTreeMixin
+from .user3 import ExporterUser3ParserMixin
+from ..core import RSZ_MAGIC, USR_MAGIC, resolve_schema_path
+from ..rich_ui import BatchProgress
+from ..schema import TypeDB
+class User3Exporter(
+    ExporterEnumSourceMixin,
+    ExporterMetadataMixin,
+    ExporterPostprocessMixin,
+    ExporterTreeMixin,
+    ExporterFieldParserMixin,
+    ExporterUser3ParserMixin,
+):
+    """把 RE Engine `.user.3` 二进制文件导出为紧凑 JSON。
+    通过组合枚举源、元数据、后处理、对象树、字段解析和 USR/RSZ 解析等
+    Mixin，提供从单文件解析到批量导出的完整能力。
+    """
+    def __init__(
+        self,
+        user3_root: str | Path,
+        schema_dir: str | Path,
+        output_root: str | Path,
+        tree_depth: int | str = "auto",
+        exclude_regexes: list[str] | None = None,
+        il2cpp_dump_path: str | Path = "",
+        user_magic: int = USR_MAGIC,
+        rsz_magic: int = RSZ_MAGIC,
+    ):
+        """初始化导出器配置和运行期索引。
+        参数：
+            user3_root (str | Path): 输入根目录或单个 ``.user.3`` 文件。
+            schema_dir (str | Path): 显式传入的 RE_RSZ 模板 JSON 文件路径。
+            output_root (str | Path): JSON 输出根目录。
+            tree_depth (int | str): 对象引用树展开深度，支持非负整数或 ``"auto"``。
+            exclude_regexes (list[str] | None): 用于排除相对路径的正则表达式列表。
+            il2cpp_dump_path (str | Path): 必填的 ``il2cpp_dump.json`` 文件路径。
+            user_magic (int): 期望读取到的 USR 文件 magic。
+            rsz_magic (int): 期望读取到的 RSZ 块 magic。
+        返回：
+            None: 构造函数，仅初始化实例属性。
+        异常：
+            FileNotFoundError: 当 ``il2cpp_dump.json`` 不存在时抛出。
+        """
+        # 路径在入口处统一转为 Path，后续模块只处理 Path 对象。
+        self.user3_root = Path(user3_root)
+        self.schema_dir = Path(schema_dir)
+        self.output_root = Path(output_root)
+        self.il2cpp_dump_path = Path(il2cpp_dump_path)
+        if not self.il2cpp_dump_path.is_file():
+            raise FileNotFoundError(
+                f"il2cpp_dump.json not found: {self.il2cpp_dump_path}"
+            )
+        self.tree_depth = self._normalize_tree_depth(tree_depth)
+        self.user_magic = int(user_magic)
+        self.rsz_magic = int(rsz_magic)
+        self.exclude_regexes = exclude_regexes or []
+        self._exclude_patterns = [re.compile(p) for p in self.exclude_regexes]
+        self.schema_path = self._resolve_schema_path(self.schema_dir)
+        self.typedb = TypeDB.load(self.schema_path)
+        # 下面这些索引在导出前由 il2cpp_dump.json 构建，用于把固定枚举值
+        # 转成 `[数值] 成员名`，并在泛型容器中推断字段对应的枚举类型。
+        self.enum_lookup: dict[str, dict[int, tuple[str, int]]] = {}
+        self.class_field_fixed_types: dict[str, dict[str, str]] = {}
+        self.serializable_to_fixed: dict[str, str] = {}
+        self.generic_container_rules: dict[str, tuple[str, str]] = {}
+        self.param_type_default_enum: dict[str, str] = {}
+        self.enum_member_to_types: dict[str, list[str]] = {}
+    def run(self) -> dict[str, int]:
+        """执行批量导出流程。
+        发现输入文件、构建枚举索引，然后逐个导出并通过 Rich 进度条反馈进度。
+        返回：
+            dict[str, int]: 统计字典，含 ``total``、``success``、``failed`` 三个计数。
+        """
+        files = self._discover_user3_files()
+        self.output_root.mkdir(parents=True, exist_ok=True)
+        # 每次导出都根据显式传入的 il2cpp_dump.json 重新生成枚举表，
+        # 不复用旧目录中的 Enums_Internal.json，避免跨游戏或跨版本污染。
+        enums_internal = self._ensure_internal_metadata_files()
+        self.enum_lookup = self._build_enum_lookup_from_enums_internal(enums_internal)
+        self._load_enum_context_from_il2cpp_dump()
+        self._ensure_enum_lookup()
+        success = 0
+        failed = 0
+        # 单文件失败只计入失败数量，不中断整批导出；这样大批量资源更容易排查。
+        with BatchProgress(
+            "Exporting user3", total=len(files), unit="file"
+        ) as progress:
+            progress.log(f"发现 {len(files)} 个 .user.3 文件。")
+            progress.log(f"使用模板: {self.schema_path}")
+            progress.log(f"输出目录: {self.output_root}")
+            for user3_file in files:
+                label = user3_file.name.replace(".user.3", "")
+                progress.update(advance=0, description=label)
+                progress.log(f"开始导出 user3: {user3_file}")
+                ok, output_path, error = self._export_one_file(user3_file)
+                if ok:
+                    success += 1
+                    progress.log(f"user3 导出完成: {output_path}", style="green")
+                else:
+                    failed += 1
+                    progress.log(f"user3 导出失败: {user3_file} ({error})", style="red")
+                progress.update(1)
+        return {"total": len(files), "success": success, "failed": failed}
+    def _export_one_file(
+        self, user3_file: Path
+    ) -> tuple[bool, Path | None, str | None]:
+        """导出单个 `.user.3` 文件。
+        参数：
+            user3_file (Path): 源 ``.user.3`` 文件路径。
+        返回：
+            tuple[bool, Path | None, str | None]: 三元组 ``(是否成功, 输出路径, 错误信息)``；
+            成功时输出路径有效、错误信息为 ``None``，失败时反之。
+        """
+        try:
+            # 解析出的原始树先经过枚举后处理，再移除内部索引和值包装，
+            # 最后对展示用浮点数做轻微圆整，生成更适合人工编辑的 JSON。
+            tree = self._parse_user3(user3_file)
+            tree = self._postprocess_enum_nodes(tree)
+            tree = self._finalize_export_tree(tree)
+            tree = self._round_export_floats(tree)
+            output_path = self._output_path_for(user3_file)
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            with output_path.open("w", encoding="utf-8") as f:
+                json.dump(tree, f, ensure_ascii=False, indent=2)
+            return True, output_path, None
+        except Exception as exc:
+            # 把异常转成简短文本返回给批处理统计，不向上抛出以免中断整批。
+            return False, None, f"{exc.__class__.__name__}: {exc}"
+    def _resolve_schema_path(self, schema_dir: Path) -> Path:
+        """校验并返回模板文件路径。
+        参数：
+            schema_dir (Path): 历史参数名，实际必须是具体模板 JSON 文件。
+        返回：
+            Path: 校验后的模板文件路径。
+        """
+        return resolve_schema_path(schema_dir)
+    def _normalize_tree_depth(self, tree_depth: int | str) -> int | str:
+        """规范化对象树展开深度。
+        参数：
+            tree_depth (int | str): 用户传入的深度设置，整数或字符串 ``"auto"``。
+        返回：
+            int | str: 非负整数或字符串 ``"auto"``。
+        异常：
+            ValueError: 字符串非 ``"auto"`` 或整数为负时抛出。
+            TypeError: 类型既不是 ``int`` 也不是 ``str`` 时抛出。
+        """
+        if isinstance(tree_depth, str):
+            value = tree_depth.strip().lower()
+            if value != "auto":
+                raise ValueError("tree_depth must be a non-negative integer or 'auto'")
+            return "auto"
+        if isinstance(tree_depth, int):
+            if tree_depth < 0:
+                raise ValueError("tree_depth must be >= 0")
+            return tree_depth
+        raise TypeError("tree_depth must be int or str")
+    def _discover_user3_files(self) -> list[Path]:
+        """发现输入 `.user.3` 文件并应用排除规则。
+        返回：
+            list[Path]: 过滤后的 ``.user.3`` 文件路径列表。
+        异常：
+            FileNotFoundError: 路径不存在、目录下无文件，或全部被排除时抛出。
+        """
+        if self.user3_root.is_file():
+            files = [self.user3_root]
+        else:
+            if not self.user3_root.is_dir():
+                raise FileNotFoundError(f"user3 root not found: {self.user3_root}")
+            files = sorted(self.user3_root.rglob("*.user.3"))
+            if not files:
+                raise FileNotFoundError(f"no *.user.3 found under: {self.user3_root}")
+        if not self._exclude_patterns:
+            return files
+        kept: list[Path] = []
+        for file_path in files:
+            # 目录模式下按相对路径匹配排除正则，便于排除整类子目录。
+            if self.user3_root.is_file():
+                rel_path = file_path.name
+            else:
+                rel_path = file_path.relative_to(self.user3_root).as_posix()
+            if any(pattern.search(rel_path) for pattern in self._exclude_patterns):
+                continue
+            kept.append(file_path)
+        if not kept:
+            raise FileNotFoundError("all *.user.3 files were excluded by regex filters")
+        return kept
+    def _output_path_for(self, user3_file: Path) -> Path:
+        """计算单个源文件对应的 JSON 输出路径。
+        参数：
+            user3_file (Path): 源 ``.user.3`` 文件。
+        返回：
+            Path: 输出 JSON 文件路径（目录模式下会还原相对子目录结构）。
+        """
+        if self.user3_root.is_file():
+            relative_parent = Path()
+        else:
+            relative_parent = user3_file.relative_to(self.user3_root).parent
+        output_name = f"{user3_file.name}.json"
+        return self.output_root / relative_parent / output_name