PyPI - epub-translator - Versions diffs - 0.1.0__tar.gz → 0.1.1__tar.gz - Mend

epub-translator 0.1.0tar.gz → 0.1.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

{epub_translator-0.1.0 → epub_translator-0.1.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: epub-translator
-Version: 0.1.0
+Version: 0.1.1
 Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
 License: MIT
 Keywords: epub,llm,translation,translator

{epub_translator-0.1.0 → epub_translator-0.1.1}/epub_translator/llm/core.py RENAMED Viewed

@@ -1,11 +1,13 @@
 import datetime
 import hashlib
 import json
+import uuid
 from collections.abc import Callable, Generator
 from importlib.resources import files
 from logging import DEBUG, FileHandler, Formatter, Logger, getLogger
 from os import PathLike
 from pathlib import Path
+from typing import Self
 from jinja2 import Environment, Template
 from tiktoken import Encoding, get_encoding
@@ -16,6 +18,89 @@ from .increasable import Increasable
 from .types import Message, MessageRole, R
+class LLMContext:
+    """Context manager for LLM requests with transactional caching."""
+    def __init__(
+        self,
+        executor: LLMExecutor,
+        cache_path: Path | None,
+    ) -> None:
+        self._executor = executor
+        self._cache_path = cache_path
+        self._context_id = uuid.uuid4().hex[:12]
+        self._temp_files: list[Path] = []
+    def __enter__(self) -> Self:
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        if exc_type is None:
+            # Success: commit all temporary cache files
+            self._commit()
+        else:
+            # Failure: rollback (delete) all temporary cache files
+            self._rollback()
+    def request(
+        self,
+        input: str | list[Message],
+        parser: Callable[[str], R] = lambda x: x,
+        max_tokens: int | None = None,
+    ) -> R:
+        messages: list[Message]
+        if isinstance(input, str):
+            messages = [Message(role=MessageRole.USER, message=input)]
+        else:
+            messages = input
+        cache_key: str | None = None
+        if self._cache_path is not None:
+            cache_key = self._compute_messages_hash(messages)
+            permanent_cache_file = self._cache_path / f"{cache_key}.txt"
+            if permanent_cache_file.exists():
+                cached_content = permanent_cache_file.read_text(encoding="utf-8")
+                return parser(cached_content)
+            temp_cache_file = self._cache_path / f"{cache_key}.{self._context_id}.txt"
+            if temp_cache_file.exists():
+                cached_content = temp_cache_file.read_text(encoding="utf-8")
+                return parser(cached_content)
+        # Make the actual request
+        response = self._executor.request(
+            messages=messages,
+            parser=lambda x: x,
+            max_tokens=max_tokens,
+        )
+        # Save to temporary cache if cache_path is set
+        if self._cache_path is not None and cache_key is not None:
+            temp_cache_file = self._cache_path / f"{cache_key}.{self._context_id}.txt"
+            temp_cache_file.write_text(response, encoding="utf-8")
+            self._temp_files.append(temp_cache_file)
+        return parser(response)
+    def _compute_messages_hash(self, messages: list[Message]) -> str:
+        messages_dict = [{"role": msg.role.value, "message": msg.message} for msg in messages]
+        messages_json = json.dumps(messages_dict, ensure_ascii=False, sort_keys=True)
+        return hashlib.sha512(messages_json.encode("utf-8")).hexdigest()
+    def _commit(self) -> None:
+        for temp_file in self._temp_files:
+            if temp_file.exists():
+                # Remove the .[context-id].txt suffix to get permanent name
+                permanent_name = temp_file.name.rsplit(".", 2)[0] + ".txt"
+                permanent_file = temp_file.parent / permanent_name
+                temp_file.rename(permanent_file)
+    def _rollback(self) -> None:
+        for temp_file in self._temp_files:
+            if temp_file.exists():
+                temp_file.unlink()
 class LLM:
     def __init__(
         self,
@@ -30,7 +115,7 @@ class LLM:
         retry_times: int = 5,
         retry_interval_seconds: float = 6.0,
         log_dir_path: PathLike | None = None,
-    ):
+    ) -> None:
         prompts_path = Path(str(files("epub_translator"))) / "data"
         self._templates: dict[str, Template] = {}
         self._encoding: Encoding = get_encoding(token_encoding)
@@ -68,41 +153,20 @@ class LLM:
     def encoding(self) -> Encoding:
         return self._encoding
+    def context(self) -> LLMContext:
+        return LLMContext(
+            executor=self._executor,
+            cache_path=self._cache_path,
+        )
     def request(
         self,
         input: str | list[Message],
         parser: Callable[[str], R] = lambda x: x,
         max_tokens: int | None = None,
     ) -> R:
-        messages: list[Message]
-        if isinstance(input, str):
-            messages = [Message(role=MessageRole.USER, message=input)]
-        else:
-            messages = input
-        # Check cache if cache_path is set
-        if self._cache_path is not None:
-            cache_key = self._compute_messages_hash(messages)
-            cache_file = self._cache_path / f"{cache_key}.txt"
-            if cache_file.exists():
-                cached_content = cache_file.read_text(encoding="utf-8")
-                return parser(cached_content)
-        # Make the actual request
-        response = self._executor.request(
-            messages=messages,
-            parser=lambda x: x,
-            max_tokens=max_tokens,
-        )
-        # Save to cache if cache_path is set
-        if self._cache_path is not None:
-            cache_key = self._compute_messages_hash(messages)
-            cache_file = self._cache_path / f"{cache_key}.txt"
-            cache_file.write_text(response, encoding="utf-8")
-        return parser(response)
+        with self.context() as ctx:
+            return ctx.request(input=input, parser=parser, max_tokens=max_tokens)
     def template(self, template_name: str) -> Template:
         template = self._templates.get(template_name, None)
@@ -111,17 +175,11 @@ class LLM:
             self._templates[template_name] = template
         return template
-    def _compute_messages_hash(self, messages: list[Message]) -> str:
-        """Compute SHA-512 hash of m·essages for cache key."""
-        messages_dict = [{"role": msg.role.value, "message": msg.message} for msg in messages]
-        messages_json = json.dumps(messages_dict, ensure_ascii=False, sort_keys=True)
-        return hashlib.sha512(messages_json.encode("utf-8")).hexdigest()
     def _create_logger(self) -> Logger | None:
         if self._logger_save_path is None:
             return None
-        now = datetime.datetime.now(datetime.timezone.utc)
+        now = datetime.datetime.now(datetime.UTC)
         timestamp = now.strftime("%Y-%m-%d %H-%M-%S %f")
         file_path = self._logger_save_path / f"request {timestamp}.log"
         logger = getLogger(f"LLM Request {timestamp}")

{epub_translator-0.1.0 → epub_translator-0.1.1}/epub_translator/translator.py RENAMED Viewed

@@ -71,7 +71,7 @@ def translate(
             placeholder.recover()
             deduplicate_ids_in_element(xml.element)
             with zip.replace(chapter_path) as target_file:
-                xml.save(target_file, is_html_like=True)
+                xml.save(target_file)
             # Update progress after each chapter
             processed_chapters += 1
@@ -198,7 +198,10 @@ def _count_chapters(zip: Zip) -> int:
 def _search_chapter_items(zip: Zip):
     for chapter_path in search_spine_paths(zip):
         with zip.read(chapter_path) as chapter_file:
-            xml = XMLLikeNode(chapter_file)
+            xml = XMLLikeNode(
+                file=chapter_file,
+                is_html_like=chapter_path.suffix.lower() in (".html", ".htm"),
+            )
         body_element = find_first(xml.element, "body")
         if body_element is not None:
             placeholder = Placeholder(body_element)

epub_translator-0.1.1/epub_translator/xml/xml_like.py ADDED Viewed

@@ -0,0 +1,231 @@
+import io
+import re
+import warnings
+from typing import IO
+from xml.etree.ElementTree import Element, fromstring, tostring
+from .xml import iter_with_stack
+_XML_NAMESPACE_URI = "http://www.w3.org/XML/1998/namespace"
+_COMMON_NAMESPACES = {
+    "http://www.w3.org/1999/xhtml": "xhtml",
+    "http://www.idpf.org/2007/ops": "epub",
+    "http://www.w3.org/1998/Math/MathML": "m",
+    "http://purl.org/dc/elements/1.1/": "dc",
+    "http://www.daisy.org/z3986/2005/ncx/": "ncx",
+    "http://www.idpf.org/2007/opf": "opf",
+    "http://www.w3.org/2000/svg": "svg",
+    "urn:oasis:names:tc:opendocument:xmlns:container": "container",
+    "http://www.w3.org/XML/1998/namespace": "xml",  # Reserved XML namespace
+}
+_ROOT_NAMESPACES = {
+    "http://www.w3.org/1999/xhtml",  # XHTML
+    "http://www.daisy.org/z3986/2005/ncx/",  # NCX
+    "http://www.idpf.org/2007/opf",  # OPF
+    "urn:oasis:names:tc:opendocument:xmlns:container",  # Container
+}
+_ENCODING_PATTERN = re.compile(r'encoding\s*=\s*["\']([^"\']+)["\']', re.IGNORECASE)
+_FIRST_ELEMENT_PATTERN = re.compile(r"<(?![?!])[a-zA-Z]")
+_NAMESPACE_IN_TAG = re.compile(r"\{([^}]+)\}")
+# Some non-standard EPUB generators use HTML-style tags without self-closing syntax
+# We need to convert them to XML-compatible format before parsing
+_EMPTY_TAGS = (
+    "br",
+    "hr",
+    "input",
+    "col",
+    "base",
+    "meta",
+    "area",
+)
+# For reading: match tags like <br> or <br class="x"> (but not <br/> or <body>)
+_EMPTY_TAG_OPEN_PATTERN = re.compile(r"<(" + "|".join(_EMPTY_TAGS) + r")(\s[^/>]*)>")
+# For saving: match self-closing tags like <br />
+_EMPTY_TAG_CLOSE_PATTERN = re.compile(r"<(" + "|".join(_EMPTY_TAGS) + r")(\s[^>]*?)\s*/>")
+class XMLLikeNode:
+    def __init__(self, file: IO[bytes], is_html_like: bool = False) -> None:
+        raw_content = file.read()
+        self._encoding: str = self._detect_encoding(raw_content)
+        content = raw_content.decode(self._encoding)
+        self._header, xml_content = self._extract_header(content)
+        self._namespaces: dict[str, str] = {}
+        self._tag_to_namespace: dict[str, str] = {}
+        self._attr_to_namespace: dict[str, str] = {}
+        # For non-standard HTML files, convert <br> to <br/> before parsing
+        self._is_html_like = is_html_like
+        if is_html_like:
+            xml_content = re.sub(
+                pattern=_EMPTY_TAG_OPEN_PATTERN,
+                repl=lambda m: f"<{m.group(1)}{m.group(2)} />",
+                string=xml_content,
+            )
+        try:
+            self.element = self._extract_and_clean_namespaces(
+                element=fromstring(xml_content),
+            )
+        except Exception as error:
+            raise ValueError("Failed to parse XML-like content") from error
+    @property
+    def encoding(self) -> str:
+        return self._encoding
+    @property
+    def namespaces(self) -> list[str]:
+        return list(self._namespaces.keys())
+    def save(self, file: IO[bytes]) -> None:
+        writer = io.TextIOWrapper(file, encoding=self._encoding, write_through=True)
+        try:
+            if self._header:
+                writer.write(self._header)
+            content = self._serialize_with_namespaces(self.element)
+            # For non-standard HTML files, convert back from <br/> to <br>
+            if self._is_html_like:
+                content = re.sub(
+                    pattern=_EMPTY_TAG_CLOSE_PATTERN,
+                    repl=lambda m: f"<{m.group(1)}{m.group(2)}>",
+                    string=content,
+                )
+            writer.write(content)
+        finally:
+            writer.detach()
+    def _detect_encoding(self, raw_content: bytes) -> str:
+        if raw_content.startswith(b"\xef\xbb\xbf"):
+            return "utf-8-sig"
+        elif raw_content.startswith(b"\xff\xfe"):
+            return "utf-16-le"
+        elif raw_content.startswith(b"\xfe\xff"):
+            return "utf-16-be"
+        # 尝试从 XML 声明中提取编码：只读取前 1024 字节来查找 XML 声明
+        header_bytes = raw_content[:1024]
+        for try_encoding in ("utf-8", "utf-16-le", "utf-16-be", "iso-8859-1"):
+            try:
+                header_str = header_bytes.decode(try_encoding)
+                match = _ENCODING_PATTERN.search(header_str)
+                if match:
+                    declared_encoding = match.group(1).lower()
+                    try:
+                        raw_content.decode(declared_encoding)
+                        return declared_encoding
+                    except (LookupError, UnicodeDecodeError):
+                        pass
+            except UnicodeDecodeError:
+                continue
+        try:
+            raw_content.decode("utf-8")
+            return "utf-8"
+        except UnicodeDecodeError:
+            pass
+        return "iso-8859-1"
+    def _extract_header(self, content: str) -> tuple[str, str]:
+        match = _FIRST_ELEMENT_PATTERN.search(content)
+        if match:
+            split_pos = match.start()
+            header = content[:split_pos]
+            xml_content = content[split_pos:]
+            return header, xml_content
+        return "", content
+    def _extract_and_clean_namespaces(self, element: Element) -> Element:
+        for _, elem in iter_with_stack(element):
+            match = _NAMESPACE_IN_TAG.match(elem.tag)
+            if match:
+                namespace_uri = match.group(1)
+                if namespace_uri not in self._namespaces:
+                    prefix = _COMMON_NAMESPACES.get(namespace_uri, f"ns{len(self._namespaces)}")
+                    self._namespaces[namespace_uri] = prefix
+                tag_name = elem.tag[len(match.group(0)) :]
+                # Record tag -> namespace mapping (warn if conflict)
+                if tag_name in self._tag_to_namespace and self._tag_to_namespace[tag_name] != namespace_uri:
+                    warnings.warn(
+                        f"Tag '{tag_name}' has multiple namespaces: "
+                        f"{self._tag_to_namespace[tag_name]} and {namespace_uri}. "
+                        f"Using the first one.",
+                        stacklevel=2,
+                    )
+                else:
+                    self._tag_to_namespace[tag_name] = namespace_uri
+                # Clean: remove namespace URI completely
+                elem.tag = tag_name
+            for attr_key in list(elem.attrib.keys()):
+                match = _NAMESPACE_IN_TAG.match(attr_key)
+                if match:
+                    namespace_uri = match.group(1)
+                    if namespace_uri not in self._namespaces:
+                        prefix = _COMMON_NAMESPACES.get(namespace_uri, f"ns{len(self._namespaces)}")
+                        self._namespaces[namespace_uri] = prefix
+                    attr_name = attr_key[len(match.group(0)) :]
+                    attr_value = elem.attrib.pop(attr_key)
+                    # Record attr -> namespace mapping (warn if conflict)
+                    if attr_name in self._attr_to_namespace and self._attr_to_namespace[attr_name] != namespace_uri:
+                        warnings.warn(
+                            f"Attribute '{attr_name}' has multiple namespaces: "
+                            f"{self._attr_to_namespace[attr_name]} and {namespace_uri}. "
+                            f"Using the first one.",
+                            stacklevel=2,
+                        )
+                    else:
+                        self._attr_to_namespace[attr_name] = namespace_uri
+                    # Clean: remove namespace URI completely
+                    elem.attrib[attr_name] = attr_value
+        return element
+    def _serialize_with_namespaces(self, element: Element) -> str:
+        # First, add namespace declarations to root element (before serialization)
+        for namespace_uri, prefix in self._namespaces.items():
+            # Skip the reserved xml namespace - it's implicit
+            if namespace_uri == _XML_NAMESPACE_URI:
+                continue
+            if namespace_uri in _ROOT_NAMESPACES:
+                element.attrib["xmlns"] = namespace_uri
+            else:
+                element.attrib[f"xmlns:{prefix}"] = namespace_uri
+        # Serialize the element tree as-is (tags are simple names without prefixes)
+        xml_string = tostring(element, encoding="unicode")
+        # Now restore namespace prefixes in the serialized string
+        # For each tag that should have a namespace prefix, wrap it with the prefix
+        for tag_name, namespace_uri in self._tag_to_namespace.items():
+            if namespace_uri not in _ROOT_NAMESPACES:
+                # Get the prefix for this namespace
+                prefix = self._namespaces[namespace_uri]
+                # Replace opening and closing tags
+                xml_string = xml_string.replace(f"<{tag_name} ", f"<{prefix}:{tag_name} ")
+                xml_string = xml_string.replace(f"<{tag_name}>", f"<{prefix}:{tag_name}>")
+                xml_string = xml_string.replace(f"</{tag_name}>", f"</{prefix}:{tag_name}>")
+                xml_string = xml_string.replace(f"<{tag_name}/>", f"<{prefix}:{tag_name}/>")
+        # Similarly for attributes (though less common in EPUB)
+        for attr_name, namespace_uri in self._attr_to_namespace.items():
+            if namespace_uri not in _ROOT_NAMESPACES:
+                prefix = self._namespaces[namespace_uri]
+                xml_string = xml_string.replace(f' {attr_name}="', f' {prefix}:{attr_name}="')
+        return xml_string

{epub_translator-0.1.0 → epub_translator-0.1.1}/epub_translator/xml_translator/translator.py RENAMED Viewed

@@ -126,53 +126,54 @@ class XMLTranslator:
         conversation_history: list[Message] = []
         latest_error: ValidationError | None = None
-        for _ in range(self._max_retries):
-            # Request LLM response
-            response = self._llm.request(
-                input=fixed_messages + conversation_history,
-            )
-            try:
-                # Extract XML from response
-                validated_element = _extract_xml_element(response)
-                # Validate with progressive locking
-                is_complete, error_message, newly_locked = validator.validate_with_locking(
-                    template_ele=fill.request_element,
-                    validated_ele=validated_element,
-                    errors_limit=self._max_fill_displaying_errors,
+        with self._llm.context() as llm_context:
+            for _ in range(self._max_retries):
+                # Request LLM response
+                response = llm_context.request(
+                    input=fixed_messages + conversation_history,
                 )
-                if is_complete:
-                    # All nodes locked, fill successful
-                    fill._fill_submitted_texts(  # pylint: disable=protected-access
-                        generated_ids_stack=[],
-                        element=validated_element,
+                try:
+                    # Extract XML from response
+                    validated_element = _extract_xml_element(response)
+                    # Validate with progressive locking
+                    is_complete, error_message, newly_locked = validator.validate_with_locking(
+                        template_ele=fill.request_element,
+                        validated_ele=validated_element,
+                        errors_limit=self._max_fill_displaying_errors,
                     )
-                    return validated_element
-                # Not complete yet, construct error message with progress info
-                progress_msg = f"Progress: {len(validator.locked_ids)} nodes locked"
-                if newly_locked:
-                    progress_msg += f", {len(newly_locked)} newly locked this round"
-                full_error_message = f"{progress_msg}\n\n{error_message}"
-                conversation_history = [
-                    Message(role=MessageRole.ASSISTANT, message=response),
-                    Message(role=MessageRole.USER, message=full_error_message),
-                ]
-            except ValidationError as error:
-                # XML extraction or basic validation failed
-                latest_error = error
-                conversation_history = [
-                    Message(role=MessageRole.ASSISTANT, message=response),
-                    Message(role=MessageRole.USER, message=str(error)),
-                ]
-        message = f"Failed to get valid XML structure after {self._max_retries} attempts"
-        if latest_error is None:
-            raise ValueError(message)
-        else:
-            raise ValueError(message) from latest_error
+                    if is_complete:
+                        # All nodes locked, fill successful
+                        fill._fill_submitted_texts(  # pylint: disable=protected-access
+                            generated_ids_stack=[],
+                            element=validated_element,
+                        )
+                        return validated_element
+                    # Not complete yet, construct error message with progress info
+                    progress_msg = f"Progress: {len(validator.locked_ids)} nodes locked"
+                    if newly_locked:
+                        progress_msg += f", {len(newly_locked)} newly locked this round"
+                    full_error_message = f"{progress_msg}\n\n{error_message}"
+                    conversation_history = [
+                        Message(role=MessageRole.ASSISTANT, message=response),
+                        Message(role=MessageRole.USER, message=full_error_message),
+                    ]
+                except ValidationError as error:
+                    # XML extraction or basic validation failed
+                    latest_error = error
+                    conversation_history = [
+                        Message(role=MessageRole.ASSISTANT, message=response),
+                        Message(role=MessageRole.USER, message=str(error)),
+                    ]
+            message = f"Failed to get valid XML structure after {self._max_retries} attempts"
+            if latest_error is None:
+                raise ValueError(message)
+            else:
+                raise ValueError(message) from latest_error

{epub_translator-0.1.0 → epub_translator-0.1.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "epub-translator"
-version = "0.1.0"
+version = "0.1.1"
 description = "Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text."
 keywords = ["epub", "llm", "translation", "translator"]
 authors = [

epub_translator-0.1.0/epub_translator/xml/xml_like.py DELETED Viewed

@@ -1,176 +0,0 @@
-import io
-import re
-from typing import IO
-from xml.etree.ElementTree import Element, fromstring, tostring
-from .xml import iter_with_stack
-_COMMON_NAMESPACES = {
-    "http://www.w3.org/1999/xhtml": "xhtml",
-    "http://www.idpf.org/2007/ops": "epub",
-    "http://www.w3.org/1998/Math/MathML": "m",
-    "http://purl.org/dc/elements/1.1/": "dc",
-    "http://www.daisy.org/z3986/2005/ncx/": "ncx",
-    "http://www.idpf.org/2007/opf": "opf",
-    "http://www.w3.org/2000/svg": "svg",
-    "urn:oasis:names:tc:opendocument:xmlns:container": "container",
-}
-_ROOT_NAMESPACES = {
-    "http://www.w3.org/1999/xhtml",  # XHTML
-    "http://www.daisy.org/z3986/2005/ncx/",  # NCX
-    "http://www.idpf.org/2007/opf",  # OPF
-    "urn:oasis:names:tc:opendocument:xmlns:container",  # Container
-}
-_ENCODING_PATTERN = re.compile(r'encoding\s*=\s*["\']([^"\']+)["\']', re.IGNORECASE)
-_FIRST_ELEMENT_PATTERN = re.compile(r"<(?![?!])[a-zA-Z]")
-_NAMESPACE_IN_TAG = re.compile(r"\{([^}]+)\}")
-# HTML 规定了一系列自闭标签，这些标签需要改成非自闭的，因为 EPub 格式不支持
-# https://www.tutorialspoint.com/which-html-tags-are-self-closing
-_EMPTY_TAGS = (
-    "br",
-    "hr",
-    "input",
-    "col",
-    "base",
-    "meta",
-    "area",
-)
-_EMPTY_TAG_PATTERN = re.compile(r"<(" + "|".join(_EMPTY_TAGS) + r")(\s[^>]*?)\s*/?>")
-class XMLLikeNode:
-    def __init__(self, file: IO[bytes]) -> None:
-        raw_content = file.read()
-        self._encoding: str = _detect_encoding(raw_content)
-        content = raw_content.decode(self._encoding)
-        self._header, xml_content = _extract_header(content)
-        try:
-            self.element = fromstring(xml_content)
-        except Exception as error:
-            raise ValueError("Failed to parse XML-like content") from error
-        self._namespaces: dict[str, str] = _extract_and_clean_namespaces(self.element)
-    @property
-    def encoding(self) -> str:
-        return self._encoding
-    @property
-    def namespaces(self) -> list[str]:
-        return list(self._namespaces.keys())
-    def save(self, file: IO[bytes], is_html_like: bool = False) -> None:
-        writer = io.TextIOWrapper(file, encoding=self._encoding, write_through=True)
-        try:
-            if self._header:
-                writer.write(self._header)
-            content = _serialize_with_namespaces(element=self.element, namespaces=self._namespaces)
-            if is_html_like:
-                content = re.sub(
-                    pattern=_EMPTY_TAG_PATTERN,
-                    repl=lambda m: f"<{m.group(1)}{m.group(2)}>",
-                    string=content,
-                )
-            else:
-                content = re.sub(
-                    pattern=_EMPTY_TAG_PATTERN,
-                    repl=lambda m: f"<{m.group(1)}{m.group(2)} />",
-                    string=content,
-                )
-            writer.write(content)
-        finally:
-            writer.detach()
-def _detect_encoding(raw_content: bytes) -> str:
-    if raw_content.startswith(b"\xef\xbb\xbf"):
-        return "utf-8-sig"
-    elif raw_content.startswith(b"\xff\xfe"):
-        return "utf-16-le"
-    elif raw_content.startswith(b"\xfe\xff"):
-        return "utf-16-be"
-    # 尝试从 XML 声明中提取编码：只读取前 1024 字节来查找 XML 声明
-    header_bytes = raw_content[:1024]
-    for try_encoding in ("utf-8", "utf-16-le", "utf-16-be", "iso-8859-1"):
-        try:
-            header_str = header_bytes.decode(try_encoding)
-            match = _ENCODING_PATTERN.search(header_str)
-            if match:
-                declared_encoding = match.group(1).lower()
-                try:
-                    raw_content.decode(declared_encoding)
-                    return declared_encoding
-                except (LookupError, UnicodeDecodeError):
-                    pass
-        except UnicodeDecodeError:
-            continue
-    try:
-        raw_content.decode("utf-8")
-        return "utf-8"
-    except UnicodeDecodeError:
-        pass
-    return "iso-8859-1"
-def _extract_header(content: str) -> tuple[str, str]:
-    match = _FIRST_ELEMENT_PATTERN.search(content)
-    if match:
-        split_pos = match.start()
-        header = content[:split_pos]
-        xml_content = content[split_pos:]
-        return header, xml_content
-    return "", content
-def _extract_and_clean_namespaces(element: Element):
-    namespaces: dict[str, str] = {}
-    for _, elem in iter_with_stack(element):
-        match = _NAMESPACE_IN_TAG.match(elem.tag)
-        if match:
-            namespace_uri = match.group(1)
-            if namespace_uri not in namespaces:
-                prefix = _COMMON_NAMESPACES.get(namespace_uri, f"ns{len(namespaces)}")
-                namespaces[namespace_uri] = prefix
-            tag_name = elem.tag[len(match.group(0)) :]
-            elem.tag = tag_name
-        for attr_key in list(elem.attrib.keys()):
-            match = _NAMESPACE_IN_TAG.match(attr_key)
-            if match:
-                namespace_uri = match.group(1)
-                if namespace_uri not in namespaces:
-                    prefix = _COMMON_NAMESPACES.get(namespace_uri, f"ns{len(namespaces)}")
-                    namespaces[namespace_uri] = prefix
-                attr_name = attr_key[len(match.group(0)) :]
-                attr_value = elem.attrib.pop(attr_key)
-                elem.attrib[attr_name] = attr_value
-    return namespaces
-def _serialize_with_namespaces(
-    element: Element,
-    namespaces: dict[str, str],
-) -> str:
-    for namespace_uri, prefix in namespaces.items():
-        if namespace_uri in _ROOT_NAMESPACES:
-            element.attrib["xmlns"] = namespace_uri
-        else:
-            element.attrib[f"xmlns:{prefix}"] = namespace_uri
-    xml_string = tostring(element, encoding="unicode")
-    for namespace_uri, prefix in namespaces.items():
-        if namespace_uri in _ROOT_NAMESPACES:
-            xml_string = xml_string.replace(f"{{{namespace_uri}}}", "")
-        else:
-            xml_string = xml_string.replace(f"{{{namespace_uri}}}", f"{prefix}:")
-        pattern = r'\s+xmlns:(ns\d+)="' + re.escape(namespace_uri) + r'"'
-        xml_string = re.sub(pattern, "", xml_string)
-    return xml_string