PyPI - msaas-data-export - Versions diffs - 0.1.0__tar.gz - Mend

msaas-data-export 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

msaas_data_export-0.1.0/.gitignore +23 -0
msaas_data_export-0.1.0/PKG-INFO +20 -0
msaas_data_export-0.1.0/pyproject.toml +42 -0
msaas_data_export-0.1.0/src/data_export/__init__.py +40 -0
msaas_data_export-0.1.0/src/data_export/bulk.py +145 -0
msaas_data_export-0.1.0/src/data_export/config.py +87 -0
msaas_data_export-0.1.0/src/data_export/formats/__init__.py +62 -0
msaas_data_export-0.1.0/src/data_export/formats/base.py +84 -0
msaas_data_export-0.1.0/src/data_export/formats/csv_format.py +56 -0
msaas_data_export-0.1.0/src/data_export/formats/excel.py +97 -0
msaas_data_export-0.1.0/src/data_export/formats/json_format.py +53 -0
msaas_data_export-0.1.0/src/data_export/formats/pdf_format.py +86 -0
msaas_data_export-0.1.0/src/data_export/gdpr.py +213 -0
msaas_data_export-0.1.0/src/data_export/models.py +140 -0
msaas_data_export-0.1.0/src/data_export/router.py +196 -0
msaas_data_export-0.1.0/src/data_export/service.py +187 -0
msaas_data_export-0.1.0/tests/__init__.py +0 -0
msaas_data_export-0.1.0/tests/conftest.py +88 -0
msaas_data_export-0.1.0/tests/test_bulk.py +158 -0
msaas_data_export-0.1.0/tests/test_config.py +77 -0
msaas_data_export-0.1.0/tests/test_csv_format.py +83 -0
msaas_data_export-0.1.0/tests/test_excel_format.py +86 -0
msaas_data_export-0.1.0/tests/test_gdpr.py +149 -0
msaas_data_export-0.1.0/tests/test_json_format.py +88 -0
msaas_data_export-0.1.0/tests/test_service.py +142 -0

msaas_data_export-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,23 @@
+node_modules/
+dist/
+.next/
+.turbo/
+*.pyc
+__pycache__/
+.venv/
+*.egg-info/
+.pytest_cache/
+.ruff_cache/
+.env
+.env.*
+!.env.example
+!.env.*.example
+!.env.*.template
+.DS_Store
+coverage/
+# Runtime artifacts
+logs_llm/
+vectors.db
+vectors.db-shm
+vectors.db-wal

msaas_data_export-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,20 @@
+Metadata-Version: 2.4
+Name: msaas-data-export
+Version: 0.1.0
+Summary: Data export and GDPR compliance library for the Willian SaaS platform
+License: MIT
+Requires-Python: >=3.12
+Requires-Dist: fastapi>=0.115.0
+Requires-Dist: msaas-api-core
+Requires-Dist: msaas-errors
+Requires-Dist: pydantic>=2.0
+Provides-Extra: dev
+Requires-Dist: httpx>=0.27.0; extra == 'dev'
+Requires-Dist: openpyxl>=3.1.0; extra == 'dev'
+Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
+Requires-Dist: pytest>=8.0; extra == 'dev'
+Requires-Dist: reportlab>=4.0; extra == 'dev'
+Provides-Extra: excel
+Requires-Dist: openpyxl>=3.1.0; extra == 'excel'
+Provides-Extra: pdf
+Requires-Dist: reportlab>=4.0; extra == 'pdf'

msaas_data_export-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,42 @@
+[project]
+name = "msaas-data-export"
+version = "0.1.0"
+description = "Data export and GDPR compliance library for the Willian SaaS platform"
+requires-python = ">=3.12"
+license = { text = "MIT" }
+dependencies = [
+    "msaas-api-core",
+    "msaas-errors",
+    "fastapi>=0.115.0",
+    "pydantic>=2.0",
+]
+[project.optional-dependencies]
+excel = ["openpyxl>=3.1.0"]
+pdf = ["reportlab>=4.0"]
+dev = [
+    "pytest>=8.0",
+    "pytest-asyncio>=0.24.0",
+    "httpx>=0.27.0",
+    "openpyxl>=3.1.0",
+    "reportlab>=4.0",
+]
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["src/data_export"]
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+testpaths = ["tests"]
+[tool.ruff]
+target-version = "py312"
+line-length = 100
+[tool.uv.sources]
+msaas-api-core = { workspace = true }
+msaas-errors = { workspace = true }

msaas_data_export-0.1.0/src/data_export/__init__.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""Willian Data Export -- Data export and GDPR compliance library."""
+from data_export.bulk import BulkImporter
+from data_export.config import ExportConfig, get_export_service, init_export
+from data_export.formats.base import ExportOptions
+from data_export.gdpr import DataSource, GDPRService
+from data_export.models import (
+    ColumnConfig,
+    ExportFormat,
+    ExportRequest,
+    ExportResult,
+    GDPRRequest,
+    GDPRRequestStatus,
+    GDPRRequestType,
+    ImportError_,
+    ImportResult,
+)
+from data_export.router import ExportRouter
+from data_export.service import ExportService
+__all__ = [
+    "BulkImporter",
+    "ColumnConfig",
+    "DataSource",
+    "ExportConfig",
+    "ExportFormat",
+    "ExportOptions",
+    "ExportRequest",
+    "ExportResult",
+    "ExportRouter",
+    "ExportService",
+    "GDPRRequest",
+    "GDPRRequestStatus",
+    "GDPRRequestType",
+    "GDPRService",
+    "ImportError_",
+    "ImportResult",
+    "get_export_service",
+    "init_export",
+]

msaas_data_export-0.1.0/src/data_export/bulk.py ADDED Viewed

@@ -0,0 +1,145 @@
+"""Bulk import operations with validation and error reporting."""
+from __future__ import annotations
+import csv
+import io
+import json
+import logging
+from collections.abc import Callable, Coroutine
+from typing import Any
+from pydantic import BaseModel, ValidationError
+from data_export.models import ImportError_, ImportResult
+logger = logging.getLogger(__name__)
+class BulkImporter:
+    """Import CSV or JSON data with per-row validation and error reporting."""
+    def validate_row(self, row: dict[str, Any], schema: type[BaseModel]) -> list[str]:
+        """Validate a single row against a Pydantic schema.
+        Args:
+            row: The data row to validate.
+            schema: Pydantic model class for validation.
+        Returns:
+            List of error messages. Empty list means the row is valid.
+        """
+        try:
+            schema.model_validate(row)
+            return []
+        except ValidationError as exc:
+            return [
+                f"{err['loc'][0] if err['loc'] else 'unknown'}: {err['msg']}"
+                for err in exc.errors()
+            ]
+    async def import_with_report(
+        self,
+        file_data: bytes | str,
+        schema: type[BaseModel],
+        process_fn: Callable[[dict[str, Any]], Coroutine[Any, Any, None]],
+        *,
+        file_format: str = "csv",
+    ) -> ImportResult:
+        """Import data with validation and processing, returning a detailed report.
+        Args:
+            file_data: Raw file content as bytes or string.
+            schema: Pydantic model class for row validation.
+            process_fn: Async callable to process each valid row.
+            file_format: Input format, either 'csv' or 'json'.
+        Returns:
+            ImportResult with success/error counts and error details.
+        """
+        rows = self._parse_file(file_data, file_format)
+        total = len(rows)
+        success = 0
+        errors: list[ImportError_] = []
+        for row_num, row in enumerate(rows, start=1):
+            validation_errors = self.validate_row(row, schema)
+            if validation_errors:
+                for msg in validation_errors:
+                    field = msg.split(":")[0] if ":" in msg else None
+                    errors.append(ImportError_(row=row_num, field=field, message=msg))
+                continue
+            try:
+                await process_fn(row)
+                success += 1
+            except Exception as exc:
+                errors.append(ImportError_(row=row_num, message=str(exc)))
+        logger.info(
+            "Bulk import completed: total=%d success=%d errors=%d", total, success, len(errors)
+        )
+        return ImportResult(total=total, success=success, errors=errors)
+    async def dry_run(
+        self,
+        file_data: bytes | str,
+        schema: type[BaseModel],
+        *,
+        file_format: str = "csv",
+    ) -> ImportResult:
+        """Validate all rows without importing, returning a validation report.
+        Args:
+            file_data: Raw file content.
+            schema: Pydantic model class for row validation.
+            file_format: Input format, either 'csv' or 'json'.
+        Returns:
+            ImportResult with validation results (no data is modified).
+        """
+        rows = self._parse_file(file_data, file_format)
+        total = len(rows)
+        success = 0
+        errors: list[ImportError_] = []
+        for row_num, row in enumerate(rows, start=1):
+            validation_errors = self.validate_row(row, schema)
+            if validation_errors:
+                for msg in validation_errors:
+                    field = msg.split(":")[0] if ":" in msg else None
+                    errors.append(ImportError_(row=row_num, field=field, message=msg))
+            else:
+                success += 1
+        logger.info("Dry run completed: total=%d valid=%d invalid=%d", total, success, len(errors))
+        return ImportResult(total=total, success=success, errors=errors)
+    def _parse_file(self, file_data: bytes | str, file_format: str) -> list[dict[str, Any]]:
+        """Parse file content into a list of row dictionaries.
+        Args:
+            file_data: Raw file content.
+            file_format: Either 'csv' or 'json'.
+        Returns:
+            List of row dictionaries.
+        Raises:
+            ValueError: If the format is unsupported or parsing fails.
+        """
+        if isinstance(file_data, bytes):
+            file_data = file_data.decode("utf-8")
+        match file_format.lower():
+            case "csv":
+                reader = csv.DictReader(io.StringIO(file_data))
+                return list(reader)
+            case "json":
+                parsed = json.loads(file_data)
+                if not isinstance(parsed, list):
+                    raise ValueError("JSON import data must be an array of objects")
+                return parsed
+            case _:
+                raise ValueError(f"Unsupported import format: {file_format}")

msaas_data_export-0.1.0/src/data_export/config.py ADDED Viewed

@@ -0,0 +1,87 @@
+"""Export module configuration and initialization."""
+from __future__ import annotations
+from pathlib import Path
+from tempfile import gettempdir
+from pydantic import BaseModel
+from data_export.models import ExportFormat
+_config: ExportConfig | None = None
+_service: object | None = None  # Lazy ref to avoid circular import
+class ExportConfig(BaseModel):
+    """Configuration for the data export module.
+    Args:
+        max_rows: Maximum rows allowed in a single export.
+        default_format: Default export format when none is specified.
+        temp_dir: Directory for temporary export files.
+        gdpr_retention_days: How long to keep GDPR request records.
+        streaming_chunk_size: Number of rows per chunk in streaming exports.
+    """
+    max_rows: int = 100_000
+    default_format: ExportFormat = ExportFormat.CSV
+    temp_dir: Path = Path(gettempdir())
+    gdpr_retention_days: int = 90
+    streaming_chunk_size: int = 1000
+def init_export(config: ExportConfig | None = None) -> ExportConfig:
+    """Initialize the export module with the given configuration.
+    Args:
+        config: Export configuration. Uses defaults if None.
+    Returns:
+        The active ExportConfig.
+    """
+    global _config, _service
+    _config = config or ExportConfig()
+    _service = None  # Reset cached service
+    return _config
+def get_config() -> ExportConfig:
+    """Return the current module configuration.
+    Raises:
+        RuntimeError: If init_export() has not been called.
+    """
+    if _config is None:
+        raise RuntimeError("Export module not initialized. Call init_export() first.")
+    return _config
+def get_export_service():
+    """Return or create the singleton ExportService.
+    Raises:
+        RuntimeError: If init_export() has not been called.
+    """
+    global _service
+    if _config is None:
+        raise RuntimeError("Export module not initialized. Call init_export() first.")
+    if _service is None:
+        from data_export.service import ExportService
+        _service = ExportService(_config)
+    return _service
+def set_config(config: ExportConfig) -> None:
+    """Override the configuration (useful for testing)."""
+    global _config
+    _config = config
+def reset() -> None:
+    """Reset module state (useful for testing)."""
+    global _config, _service
+    _config = None
+    _service = None

msaas_data_export-0.1.0/src/data_export/formats/__init__.py ADDED Viewed

@@ -0,0 +1,62 @@
+"""Export format implementations."""
+from data_export.formats.base import BaseExportFormat, ExportOptions
+from data_export.formats.csv_format import CSVExportFormat
+from data_export.formats.json_format import JSONExportFormat
+__all__ = [
+    "BaseExportFormat",
+    "CSVExportFormat",
+    "ExportOptions",
+    "JSONExportFormat",
+    "get_format",
+]
+def get_format(format_name: str) -> BaseExportFormat:
+    """Return the appropriate format handler for the given format name.
+    Args:
+        format_name: One of 'csv', 'json', 'excel', 'pdf'.
+    Returns:
+        An instance of the corresponding format handler.
+    Raises:
+        ValueError: If the format is not supported or its optional dependency is missing.
+    """
+    match format_name:
+        case "csv":
+            return CSVExportFormat()
+        case "json":
+            return JSONExportFormat()
+        case "excel":
+            return _get_excel_format()
+        case "pdf":
+            return _get_pdf_format()
+        case _:
+            raise ValueError(f"Unsupported export format: {format_name}")
+def _get_excel_format() -> BaseExportFormat:
+    """Load Excel format, raising a clear error if openpyxl is missing."""
+    try:
+        from data_export.formats.excel import ExcelExportFormat
+        return ExcelExportFormat()
+    except ImportError:
+        raise ValueError(
+            "Excel export requires openpyxl. Install with: pip install willian-data-export[excel]"
+        ) from None
+def _get_pdf_format() -> BaseExportFormat:
+    """Load PDF format, raising a clear error if reportlab is missing."""
+    try:
+        from data_export.formats.pdf_format import PDFExportFormat
+        return PDFExportFormat()
+    except ImportError:
+        raise ValueError(
+            "PDF export requires reportlab. Install with: pip install willian-data-export[pdf]"
+        ) from None

msaas_data_export-0.1.0/src/data_export/formats/base.py ADDED Viewed

@@ -0,0 +1,84 @@
+"""Abstract base class for export formats."""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from typing import Any
+from pydantic import BaseModel
+from data_export.models import ColumnConfig
+class ExportOptions(BaseModel):
+    """Options that control export behavior.
+    Args:
+        columns: Column configuration for field selection and renaming.
+        pretty: Enable human-readable formatting where applicable (e.g., JSON indent).
+        delimiter: Field delimiter for CSV exports.
+        encoding: Character encoding for text-based exports.
+        bom: Include a byte-order mark for Excel CSV compatibility.
+        sheet_name: Worksheet name for Excel exports.
+    """
+    columns: list[ColumnConfig] | None = None
+    pretty: bool = False
+    delimiter: str = ","
+    encoding: str = "utf-8"
+    bom: bool = False
+    sheet_name: str = "Sheet1"
+class BaseExportFormat(ABC):
+    """Abstract base for all export format implementations.
+    Subclasses must implement `export()` and provide `content_type` and `extension`.
+    """
+    @property
+    @abstractmethod
+    def content_type(self) -> str:
+        """MIME type for this format (e.g. 'text/csv')."""
+    @property
+    @abstractmethod
+    def extension(self) -> str:
+        """File extension without dot (e.g. 'csv')."""
+    @abstractmethod
+    def export(self, data: list[dict[str, Any]], options: ExportOptions | None = None) -> bytes:
+        """Export data rows to bytes in this format.
+        Args:
+            data: List of dictionaries, each representing a row.
+            options: Export options controlling columns, formatting, etc.
+        Returns:
+            The exported file content as bytes.
+        """
+    def apply_columns(
+        self, data: list[dict[str, Any]], options: ExportOptions | None
+    ) -> tuple[list[dict[str, Any]], list[str]]:
+        """Apply column selection and renaming to the data.
+        Args:
+            data: Raw data rows.
+            options: Export options with optional column config.
+        Returns:
+            Tuple of (transformed rows, ordered header names).
+        """
+        if not data:
+            return [], []
+        if options and options.columns:
+            headers = [col.display_header for col in options.columns]
+            fields = [col.field for col in options.columns]
+            transformed = [{h: row.get(f) for h, f in zip(headers, fields)} for row in data]
+            return transformed, headers
+        # No column config: use all keys from the first row
+        headers = list(data[0].keys())
+        return data, headers

msaas_data_export-0.1.0/src/data_export/formats/csv_format.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""CSV export format implementation."""
+from __future__ import annotations
+import csv
+import io
+from typing import Any
+from data_export.formats.base import BaseExportFormat, ExportOptions
+class CSVExportFormat(BaseExportFormat):
+    """Export data as CSV with configurable delimiter, encoding, and BOM support."""
+    @property
+    def content_type(self) -> str:
+        return "text/csv"
+    @property
+    def extension(self) -> str:
+        return "csv"
+    def export(self, data: list[dict[str, Any]], options: ExportOptions | None = None) -> bytes:
+        """Export data rows to CSV bytes.
+        Args:
+            data: List of row dictionaries.
+            options: Controls delimiter, encoding, BOM, and column mapping.
+        Returns:
+            CSV file content as bytes.
+        """
+        opts = options or ExportOptions()
+        transformed, headers = self.apply_columns(data, opts)
+        if not headers:
+            return b""
+        output = io.StringIO()
+        writer = csv.DictWriter(
+            output,
+            fieldnames=headers,
+            delimiter=opts.delimiter,
+            extrasaction="ignore",
+        )
+        writer.writeheader()
+        for row in transformed:
+            writer.writerow(row)
+        content = output.getvalue()
+        encoded = content.encode(opts.encoding)
+        if opts.bom and opts.encoding.lower().replace("-", "") == "utf8":
+            return b"\xef\xbb\xbf" + encoded
+        return encoded

msaas_data_export-0.1.0/src/data_export/formats/excel.py ADDED Viewed

@@ -0,0 +1,97 @@
+"""Excel export format implementation using openpyxl."""
+from __future__ import annotations
+import io
+from typing import Any
+from data_export.formats.base import BaseExportFormat, ExportOptions
+try:
+    import openpyxl
+    from openpyxl.utils import get_column_letter
+    HAS_OPENPYXL = True
+except ImportError:
+    HAS_OPENPYXL = False
+class ExcelExportFormat(BaseExportFormat):
+    """Export data as an Excel .xlsx file with header styling and auto-width columns."""
+    def __init__(self) -> None:
+        if not HAS_OPENPYXL:
+            raise ImportError(
+                "openpyxl is required for Excel export. "
+                "Install with: pip install willian-data-export[excel]"
+            )
+    @property
+    def content_type(self) -> str:
+        return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+    @property
+    def extension(self) -> str:
+        return "xlsx"
+    def export(self, data: list[dict[str, Any]], options: ExportOptions | None = None) -> bytes:
+        """Export data rows to Excel bytes.
+        Args:
+            data: List of row dictionaries.
+            options: Controls column mapping, sheet name, and column widths.
+        Returns:
+            Excel file content as bytes.
+        """
+        opts = options or ExportOptions()
+        transformed, headers = self.apply_columns(data, opts)
+        wb = openpyxl.Workbook()
+        ws = wb.active
+        ws.title = opts.sheet_name
+        # Write header row with bold styling
+        from openpyxl.styles import Font
+        bold_font = Font(bold=True)
+        for col_idx, header in enumerate(headers, start=1):
+            cell = ws.cell(row=1, column=col_idx, value=header)
+            cell.font = bold_font
+        # Write data rows
+        for row_idx, row in enumerate(transformed, start=2):
+            for col_idx, header in enumerate(headers, start=1):
+                ws.cell(row=row_idx, column=col_idx, value=row.get(header))
+        # Auto-width columns based on content
+        self._auto_width(ws, headers, transformed, opts)
+        buffer = io.BytesIO()
+        wb.save(buffer)
+        return buffer.getvalue()
+    def _auto_width(
+        self,
+        ws: Any,
+        headers: list[str],
+        data: list[dict[str, Any]],
+        opts: ExportOptions,
+    ) -> None:
+        """Set column widths based on explicit config or content length."""
+        col_configs = {col.display_header: col for col in (opts.columns or [])}
+        for col_idx, header in enumerate(headers, start=1):
+            config = col_configs.get(header)
+            if config and config.width:
+                ws.column_dimensions[get_column_letter(col_idx)].width = config.width
+                continue
+            # Calculate width from content
+            max_len = len(str(header))
+            for row in data[:100]:  # Sample first 100 rows for performance
+                val = row.get(header)
+                if val is not None:
+                    max_len = max(max_len, len(str(val)))
+            ws.column_dimensions[get_column_letter(col_idx)].width = min(max_len + 2, 50)