PyPI - office2pdf-python - Versions diffs - 0.1.0__tar.gz → 0.2.0__tar.gz - Mend

office2pdf-python 0.1.0tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/Cargo.lock RENAMED Viewed

@@ -1932,7 +1932,7 @@ dependencies = [
 [[package]]
 name = "office2pdf-python"
-version = "0.1.0"
+version = "0.2.0"
 dependencies = [
  "office2pdf",
  "pyo3",

{office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/Cargo.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "office2pdf-python"
-version = "0.1.0"
+version = "0.2.0"
 edition = "2021"
 license = "Apache-2.0"
 description = "PyO3 bindings for office2pdf"

{office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: office2pdf-python
-Version: 0.1.0
+Version: 0.2.0
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: Apache Software License
@@ -67,30 +67,93 @@ The CLI accepts DOCX, PPTX, and XLSX input paths and writes the converted PDF by
 ### `Format`
-Enum values: `Format.DOCX`, `Format.PPTX`, and `Format.XLSX`.
+`Format` identifies the input Office format for byte-based conversion:
+- `Format.DOCX` (`"docx"`)
+- `Format.PPTX` (`"pptx"`)
+- `Format.XLSX` (`"xlsx"`)
+### `PdfStandard`
+`PdfStandard` currently supports `pdf/a-2b`:
+- canonical: `PdfStandard.PDF_A_2B`
+- compatibility alias: `PdfStandard.PDF_A_2_B`
+`PdfStandard.from_value()` accepts both forms and related normalizations (`"pdf/a-2b"`, `"pdfa2b"`).
+### `PaperSize`
+- `PaperSize.A4`
+- `PaperSize.LETTER`
+- `PaperSize.LEGAL`
+### `CustomPaperSize`
+`CustomPaperSize(width: float, height: float)` stores explicit PDF point dimensions, matching upstream `PaperSize::Custom` (`1 point = 1/72 inch`).
 ### `ConvertOptions`
-Dataclass fields:
+All options are stored in a Python dataclass and translated to native options via `to_native()`.
-- `page_range: str | None` — rejected when set because `office2pdf 0.6.0` does not expose a native field for it.
-- `sheet_filter: Sequence[str] | None` — maps to upstream `sheet_names` for XLSX conversion.
-- `slide_range: str | None` — accepts upstream strings like `"1-5"` or `"3"`.
-- `paper_size: str | None` — accepts upstream `"a4"`, `"letter"`, or `"legal"`.
-- `landscape: bool | None` — maps to upstream orientation control.
-- `font_paths: Sequence[str | pathlib.Path]` — additional font directories.
-- `pdf_standard: str | None` — currently supports `"pdf/a-2b"`.
-- `include_warnings: bool` — controls whether returned warnings are included in `ConversionResult`.
-- `memory_limit_mb: int | None` — rejected when set because `office2pdf 0.6.0` does not expose a native field for it.
-- `streaming: bool` — enables upstream streaming mode for supported formats.
+- `sheet_names: Sequence[str] | None`
+- `sheet_filter: Sequence[str] | None`
-### `ConversionResult`
+  These are aliases for XLSX sheet selection. If both are provided, they must be equal.
+- `slide_range: SlideRange | str | None`
+  `SlideRange` supports `"1-5"` parsing and also accepts explicit `SlideRange(1, 5)`. Values are normalized to `start-end` strings for native conversion.
+- `pdf_standard: PdfStandard | str | None`
+  Only `pdf/a-2b` is supported at this version.
+- `paper_size: PaperSize | CustomPaperSize | str | None`
+  String values normalize to named page sizes.
-Dataclass fields:
+- `font_paths: Sequence[str | pathlib.Path]`
+- `landscape: bool | None`
+- `tagged: bool | None`
+- `pdf_ua: bool | None`
+- `streaming: bool`
+- `streaming_chunk_size: int | None`
+- `include_warnings: bool`
+Unsupported options are rejected to preserve API compatibility with upstream `office2pdf 0.6.0`:
+- `page_range: str | None`
+- `memory_limit_mb: int | None`
+### `ConversionResult`
 - `pdf: bytes`
-- `warnings: tuple[str, ...]`
-- `metrics: Mapping[str, Any] | None`
+- `warnings: tuple[ConvertWarning, ...]`
+- `metrics: ConvertMetrics | None`
+- `warning_messages: tuple[str, ...]` property collecting warning messages.
+### Warning types
+Warning payloads from the native layer are mapped to typed subclasses of `ConvertWarning`:
+- `UnsupportedElementWarning(format, element)`
+- `PartialElementWarning(format, element, detail)`
+- `FallbackUsedWarning(format, from_, to)`
+- `ParseSkippedWarning(format, reason)`
+- and a base `ConvertWarning` for legacy/unknown forms.
+### `ConvertMetrics`
+- `parse_duration`
+- `codegen_duration`
+- `compile_duration`
+- `total_duration`
+- `input_size_bytes`
+- `output_size_bytes`
+- `page_count`
+Duration fields are reported in seconds.
 ### Functions
@@ -100,7 +163,27 @@ convert_path(path: str | pathlib.Path, options: ConvertOptions | None = None) ->
 infer_format(path: str | pathlib.Path) -> Format
 ```
-`convert_path()` validates the file extension before calling the native extension. `convert_bytes()` requires an explicit format.
+- `infer_format()` reads the file suffix and accepts only `.docx`, `.pptx`, or `.xlsx`.
+- `convert_path()` validates the file extension before conversion.
+- `convert_bytes()` requires an explicit input `format`.
+## Exceptions
+Re-exported exception hierarchy:
+- `Office2PdfError`
+- `UnsupportedFormatError`
+- `Office2PdfIoError`
+- `Office2PdfParseError`
+- `Office2PdfRenderError`
+- `UnsupportedEncryptionError`
+- `UnsupportedOptionError`
+## API scope
+Version `0.2.0` exposes the upstream `office2pdf 0.6.0` conversion API: file/bytes conversion, conversion options, structured warnings, metrics, and typed errors.
+The upstream `pdf_ops` feature (`page_count`, `merge`, `split`), internal IR/parser/render modules, TypeScript helpers, and WASM APIs are intentionally out of scope for this Python release.
 ## Local development
@@ -148,8 +231,8 @@ Create a matching GitHub environment named `pypi` and require manual approval fo
 To publish a release automatically, update the version in `pyproject.toml` and `Cargo.toml`, commit the change, then push a matching tag:
 ```bash
-git tag v0.1.0
-git push origin v0.1.0
+git tag v0.2.0
+git push origin v0.2.0
 ```
 The tag push starts `.github/workflows/release.yml`, builds artifacts, publishes to PyPI after the `pypi` environment approval, and creates a GitHub Release for tag-triggered runs.

{office2pdf_python-0.1.0 → office2pdf_python-0.2.0}/README.md RENAMED Viewed

@@ -42,30 +42,93 @@ The CLI accepts DOCX, PPTX, and XLSX input paths and writes the converted PDF by
 ### `Format`
-Enum values: `Format.DOCX`, `Format.PPTX`, and `Format.XLSX`.
+`Format` identifies the input Office format for byte-based conversion:
+- `Format.DOCX` (`"docx"`)
+- `Format.PPTX` (`"pptx"`)
+- `Format.XLSX` (`"xlsx"`)
+### `PdfStandard`
+`PdfStandard` currently supports `pdf/a-2b`:
+- canonical: `PdfStandard.PDF_A_2B`
+- compatibility alias: `PdfStandard.PDF_A_2_B`
+`PdfStandard.from_value()` accepts both forms and related normalizations (`"pdf/a-2b"`, `"pdfa2b"`).
+### `PaperSize`
+- `PaperSize.A4`
+- `PaperSize.LETTER`
+- `PaperSize.LEGAL`
+### `CustomPaperSize`
+`CustomPaperSize(width: float, height: float)` stores explicit PDF point dimensions, matching upstream `PaperSize::Custom` (`1 point = 1/72 inch`).
 ### `ConvertOptions`
-Dataclass fields:
+All options are stored in a Python dataclass and translated to native options via `to_native()`.
-- `page_range: str | None` — rejected when set because `office2pdf 0.6.0` does not expose a native field for it.
-- `sheet_filter: Sequence[str] | None` — maps to upstream `sheet_names` for XLSX conversion.
-- `slide_range: str | None` — accepts upstream strings like `"1-5"` or `"3"`.
-- `paper_size: str | None` — accepts upstream `"a4"`, `"letter"`, or `"legal"`.
-- `landscape: bool | None` — maps to upstream orientation control.
-- `font_paths: Sequence[str | pathlib.Path]` — additional font directories.
-- `pdf_standard: str | None` — currently supports `"pdf/a-2b"`.
-- `include_warnings: bool` — controls whether returned warnings are included in `ConversionResult`.
-- `memory_limit_mb: int | None` — rejected when set because `office2pdf 0.6.0` does not expose a native field for it.
-- `streaming: bool` — enables upstream streaming mode for supported formats.
+- `sheet_names: Sequence[str] | None`
+- `sheet_filter: Sequence[str] | None`
-### `ConversionResult`
+  These are aliases for XLSX sheet selection. If both are provided, they must be equal.
+- `slide_range: SlideRange | str | None`
+  `SlideRange` supports `"1-5"` parsing and also accepts explicit `SlideRange(1, 5)`. Values are normalized to `start-end` strings for native conversion.
+- `pdf_standard: PdfStandard | str | None`
+  Only `pdf/a-2b` is supported at this version.
+- `paper_size: PaperSize | CustomPaperSize | str | None`
+  String values normalize to named page sizes.
-Dataclass fields:
+- `font_paths: Sequence[str | pathlib.Path]`
+- `landscape: bool | None`
+- `tagged: bool | None`
+- `pdf_ua: bool | None`
+- `streaming: bool`
+- `streaming_chunk_size: int | None`
+- `include_warnings: bool`
+Unsupported options are rejected to preserve API compatibility with upstream `office2pdf 0.6.0`:
+- `page_range: str | None`
+- `memory_limit_mb: int | None`
+### `ConversionResult`
 - `pdf: bytes`
-- `warnings: tuple[str, ...]`
-- `metrics: Mapping[str, Any] | None`
+- `warnings: tuple[ConvertWarning, ...]`
+- `metrics: ConvertMetrics | None`
+- `warning_messages: tuple[str, ...]` property collecting warning messages.
+### Warning types
+Warning payloads from the native layer are mapped to typed subclasses of `ConvertWarning`:
+- `UnsupportedElementWarning(format, element)`
+- `PartialElementWarning(format, element, detail)`
+- `FallbackUsedWarning(format, from_, to)`
+- `ParseSkippedWarning(format, reason)`
+- and a base `ConvertWarning` for legacy/unknown forms.
+### `ConvertMetrics`
+- `parse_duration`
+- `codegen_duration`
+- `compile_duration`
+- `total_duration`
+- `input_size_bytes`
+- `output_size_bytes`
+- `page_count`
+Duration fields are reported in seconds.
 ### Functions
@@ -75,7 +138,27 @@ convert_path(path: str | pathlib.Path, options: ConvertOptions | None = None) ->
 infer_format(path: str | pathlib.Path) -> Format
 ```
-`convert_path()` validates the file extension before calling the native extension. `convert_bytes()` requires an explicit format.
+- `infer_format()` reads the file suffix and accepts only `.docx`, `.pptx`, or `.xlsx`.
+- `convert_path()` validates the file extension before conversion.
+- `convert_bytes()` requires an explicit input `format`.
+## Exceptions
+Re-exported exception hierarchy:
+- `Office2PdfError`
+- `UnsupportedFormatError`
+- `Office2PdfIoError`
+- `Office2PdfParseError`
+- `Office2PdfRenderError`
+- `UnsupportedEncryptionError`
+- `UnsupportedOptionError`
+## API scope
+Version `0.2.0` exposes the upstream `office2pdf 0.6.0` conversion API: file/bytes conversion, conversion options, structured warnings, metrics, and typed errors.
+The upstream `pdf_ops` feature (`page_count`, `merge`, `split`), internal IR/parser/render modules, TypeScript helpers, and WASM APIs are intentionally out of scope for this Python release.
 ## Local development
@@ -123,8 +206,8 @@ Create a matching GitHub environment named `pypi` and require manual approval fo
 To publish a release automatically, update the version in `pyproject.toml` and `Cargo.toml`, commit the change, then push a matching tag:
 ```bash
-git tag v0.1.0
-git push origin v0.1.0
+git tag v0.2.0
+git push origin v0.2.0
 ```
 The tag push starts `.github/workflows/release.yml`, builds artifacts, publishes to PyPI after the `pypi` environment approval, and creates a GitHub Release for tag-triggered runs.

office2pdf_python-0.2.0/office2pdf/__init__.py ADDED Viewed

@@ -0,0 +1,74 @@
+from __future__ import annotations
+from importlib import metadata
+from ._native_bridge import (
+    _metrics_from_native,
+    _native_module,
+    _native_options,
+    _result_from_native,
+    _warning_from_native,
+    convert_bytes,
+    convert_path,
+    infer_format,
+)
+from .exceptions import (
+    Office2PdfError,
+    Office2PdfIoError,
+    Office2PdfParseError,
+    Office2PdfRenderError,
+    UnsupportedEncryptionError,
+    UnsupportedFormatError,
+    UnsupportedOptionError,
+)
+from .models import (
+    ConversionMetrics,
+    ConversionResult,
+    ConvertMetrics,
+    ConvertWarning,
+    CustomPaperSize,
+    FallbackUsedWarning,
+    Format,
+    ParseSkippedWarning,
+    PaperSize,
+    PartialElementWarning,
+    PdfStandard,
+    SlideRange,
+    UnsupportedElementWarning,
+)
+from .options import ConvertOptions
+try:
+    __version__ = metadata.version("office2pdf-python")
+except metadata.PackageNotFoundError:
+    __version__ = "0.2.0"
+__all__ = [
+    "ConversionResult",
+    "ConversionMetrics",
+    "ConvertMetrics",
+    "ConvertOptions",
+    "Format",
+    "PaperSize",
+    "CustomPaperSize",
+    "PdfStandard",
+    "SlideRange",
+    "ConvertWarning",
+    "UnsupportedElementWarning",
+    "PartialElementWarning",
+    "FallbackUsedWarning",
+    "ParseSkippedWarning",
+    "Office2PdfError",
+    "UnsupportedFormatError",
+    "Office2PdfIoError",
+    "Office2PdfParseError",
+    "Office2PdfRenderError",
+    "UnsupportedEncryptionError",
+    "UnsupportedOptionError",
+    "__version__",
+    "convert_bytes",
+    "convert_path",
+    "infer_format",
+]

office2pdf_python-0.2.0/office2pdf/_native.pyi ADDED Viewed

@@ -0,0 +1,67 @@
+from __future__ import annotations
+from typing import Mapping, Sequence, TypedDict
+class Office2PdfError(Exception): ...
+class UnsupportedFormatError(Office2PdfError): ...
+class Office2PdfIoError(Office2PdfError): ...
+class Office2PdfParseError(Office2PdfError): ...
+class Office2PdfRenderError(Office2PdfError): ...
+class UnsupportedEncryptionError(Office2PdfError): ...
+class UnsupportedOptionError(Office2PdfError): ...
+__version__: str
+ConvertWarningData = TypedDict(
+    "ConvertWarningData",
+    {
+        "kind": str,
+        "format": str,
+        "element": str,
+        "detail": str,
+        "from": str,
+        "to": str,
+        "reason": str,
+        "message": str,
+    },
+    total=False,
+)
+class ConvertMetricsData(TypedDict):
+    parse_duration: float
+    codegen_duration: float
+    compile_duration: float
+    total_duration: float
+    input_size_bytes: int
+    output_size_bytes: int
+    page_count: int
+class ConvertResultData(TypedDict, total=False):
+    pdf: bytes
+    warnings: Sequence[str | ConvertWarningData]
+    metrics: ConvertMetricsData | None
+def convert_bytes(
+    data: bytes,
+    format: str,
+    options: Mapping[str, object] | None = None,
+) -> ConvertResultData: ...
+def convert_path(
+    path: str,
+    options: Mapping[str, object] | None = None,
+) -> ConvertResultData: ...

office2pdf_python-0.2.0/office2pdf/_native_bridge.py ADDED Viewed

@@ -0,0 +1,179 @@
+from __future__ import annotations
+from collections.abc import Mapping
+from importlib import import_module
+from pathlib import Path
+from typing import Protocol, runtime_checkable
+from .exceptions import (
+    Office2PdfError,
+    Office2PdfIoError,
+    Office2PdfParseError,
+    Office2PdfRenderError,
+    UnsupportedEncryptionError,
+    UnsupportedFormatError,
+    UnsupportedOptionError,
+)
+from ._results import _metrics_from_native, _result_from_native, _warning_from_native
+from .models import ConversionResult, Format
+from .options import ConvertOptions
+@runtime_checkable
+class _NativeModule(Protocol):
+    def convert_bytes(
+        self,
+        data: bytes,
+        format: str,
+        options: dict[str, object] | None = None,
+    ) -> Mapping[str, object]:
+        ...
+    def convert_path(
+        self,
+        path: str,
+        options: dict[str, object] | None = None,
+    ) -> Mapping[str, object]:
+        ...
+@runtime_checkable
+class _NativeBytesModule(Protocol):
+    def convert_bytes(
+        self,
+        data: bytes,
+        format: str,
+        options: dict[str, object] | None = None,
+    ) -> Mapping[str, object]:
+        ...
+@runtime_checkable
+class _NativePathModule(Protocol):
+    def convert_path(
+        self,
+        path: str,
+        options: dict[str, object] | None = None,
+    ) -> Mapping[str, object]:
+        ...
+def _native_module() -> _NativeModule:
+    module = import_module("office2pdf._native")
+    if not isinstance(module, _NativeModule):
+        raise TypeError("office2pdf._native does not provide the expected conversion functions")
+    return module
+def _native_error_types() -> tuple[type[BaseException], ...]:
+    module = import_module("office2pdf._native")
+    names = (
+        "Office2PdfError",
+        "UnsupportedFormatError",
+        "Office2PdfIoError",
+        "Office2PdfParseError",
+        "Office2PdfRenderError",
+        "UnsupportedEncryptionError",
+        "UnsupportedOptionError",
+    )
+    errors: list[type[BaseException]] = []
+    for name in names:
+        candidate = getattr(module, name, None)
+        if isinstance(candidate, type) and issubclass(candidate, BaseException):
+            errors.append(candidate)
+    return tuple(errors)
+def _public_error_from_native(error: BaseException) -> Office2PdfError:
+    message = str(error)
+    match error.__class__.__name__:
+        case "UnsupportedFormatError":
+            return UnsupportedFormatError(message)
+        case "Office2PdfIoError":
+            return Office2PdfIoError(message)
+        case "Office2PdfParseError":
+            return Office2PdfParseError(message)
+        case "Office2PdfRenderError":
+            return Office2PdfRenderError(message)
+        case "UnsupportedEncryptionError":
+            return UnsupportedEncryptionError(message)
+        case "UnsupportedOptionError":
+            return UnsupportedOptionError(message)
+        case _:
+            return Office2PdfError(message)
+def _native_bytes_module() -> _NativeBytesModule:
+    module = import_module("office2pdf._native")
+    if not isinstance(module, _NativeBytesModule):
+        raise TypeError("office2pdf._native does not provide convert_bytes")
+    return module
+def _native_path_module() -> _NativePathModule:
+    module = import_module("office2pdf._native")
+    if not isinstance(module, _NativePathModule):
+        raise TypeError("office2pdf._native does not provide convert_path")
+    return module
+def _native_options(options: ConvertOptions | None) -> dict[str, object]:
+    return ConvertOptions().to_native() if options is None else options.to_native()
+def infer_format(path: str | Path) -> Format:
+    suffix = Path(path).suffix.lower().lstrip(".")
+    if not suffix:
+        raise ValueError("path has no extension; expected .docx, .pptx, or .xlsx")
+    return Format.from_value(suffix)
+def convert_bytes(
+    data: bytes | bytearray | memoryview,
+    format: Format | str,
+    options: ConvertOptions | None = None,
+) -> ConversionResult:
+    if not isinstance(data, (bytes, bytearray, memoryview)):
+        raise TypeError("data must be bytes-like")
+    payload = bytes(data)
+    if not payload:
+        raise ValueError("data must not be empty")
+    input_format = Format.from_value(format)
+    native = _native_bytes_module()
+    native_errors = _native_error_types()
+    try:
+        result = native.convert_bytes(payload, input_format.value, _native_options(options))
+    except native_errors as error:
+        raise _public_error_from_native(error) from error
+    if not isinstance(result, Mapping):
+        raise TypeError("native conversion result must be a mapping")
+    return _result_from_native(result)
+def convert_path(path: str | Path, options: ConvertOptions | None = None) -> ConversionResult:
+    input_path = Path(path)
+    infer_format(input_path)
+    native = _native_path_module()
+    native_errors = _native_error_types()
+    try:
+        result = native.convert_path(str(input_path), _native_options(options))
+    except native_errors as error:
+        raise _public_error_from_native(error) from error
+    if not isinstance(result, Mapping):
+        raise TypeError("native conversion result must be a mapping")
+    return _result_from_native(result)
+__all__ = [
+    "convert_bytes",
+    "convert_path",
+    "infer_format",
+    "_native_module",
+    "_native_options",
+    "_result_from_native",
+    "_warning_from_native",
+    "_metrics_from_native",
+]

office2pdf-python 0.1.0__tar.gz → 0.2.0__tar.gz

office2pdf-python 0.1.0tar.gz → 0.2.0tar.gz