PyPI - rdetoolkit - Versions diffs - 1.5.1__cp312-cp312-win_amd64.whl → 1.5.3__cp312-cp312-win_amd64.whl - Mend

rdetoolkit 1.5.1__cp312-cp312-win_amd64.whl → 1.5.3__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

rdetoolkit/__init__.py +19 -1
rdetoolkit/cli/validate.py +84 -55
rdetoolkit/cmd/validate.py +18 -15
rdetoolkit/config.py +211 -23
rdetoolkit/core.cp312-win_amd64.pyd +0 -0
rdetoolkit/exceptions.py +94 -0
rdetoolkit/exceptions.pyi +21 -0
rdetoolkit/invoicefile.py +11 -9
rdetoolkit/models/metadata.py +97 -2
rdetoolkit/models/metadata.pyi +22 -1
rdetoolkit/processing/processors/invoice.py +41 -0
rdetoolkit/validation.py +94 -8
rdetoolkit/validation.pyi +5 -0
rdetoolkit/workflows.py +8 -2
{rdetoolkit-1.5.1.dist-info → rdetoolkit-1.5.3.dist-info}/METADATA +17 -9
{rdetoolkit-1.5.1.dist-info → rdetoolkit-1.5.3.dist-info}/RECORD +19 -19
{rdetoolkit-1.5.1.dist-info → rdetoolkit-1.5.3.dist-info}/WHEEL +0 -0
{rdetoolkit-1.5.1.dist-info → rdetoolkit-1.5.3.dist-info}/entry_points.txt +0 -0
{rdetoolkit-1.5.1.dist-info → rdetoolkit-1.5.3.dist-info}/licenses/LICENSE +0 -0

rdetoolkit/exceptions.py CHANGED Viewed

@@ -176,3 +176,97 @@ class InvalidSearchParametersError(Exception):
     def __init__(self, message: str = "Invalid search term") -> None:
         self.message = message
         super().__init__(self.message)
+class ConfigError(Exception):
+    """Exception raised for configuration file loading errors.
+    This exception provides structured, informative error messages for configuration
+    file failures, including file paths, error types, line/column information for
+    parse errors, and documentation links.
+    Attributes:
+        message: The error message describing what went wrong.
+        file_path: Path to the configuration file that failed to load.
+        error_type: Type of error (e.g., 'file_not_found', 'parse_error', 'validation_error').
+        line_number: Line number where error occurred (for parse errors).
+        column_number: Column number where error occurred (for parse errors).
+        field_name: Field name that failed validation (for validation errors).
+        doc_url: Documentation URL for help and troubleshooting.
+    Examples:
+        File not found error:
+        >>> raise ConfigError(
+        ...     "Configuration file not found",
+        ...     file_path="config.yaml",
+        ...     error_type="file_not_found"
+        ... )
+        Parse error with line information:
+        >>> raise ConfigError(
+        ...     "Invalid YAML syntax: expected <block end>",
+        ...     file_path="config.yaml",
+        ...     error_type="parse_error",
+        ...     line_number=10,
+        ...     column_number=5
+        ... )
+        Validation error with field information:
+        >>> raise ConfigError(
+        ...     "Invalid value for field",
+        ...     file_path="config.yaml",
+        ...     error_type="validation_error",
+        ...     field_name="system.extended_mode"
+        ... )
+    """
+    def __init__(
+        self,
+        message: str,
+        *,
+        file_path: str | None = None,
+        error_type: str = "unknown",
+        line_number: int | None = None,
+        column_number: int | None = None,
+        field_name: str | None = None,
+        doc_url: str = "https://nims-mdpf.github.io/rdetoolkit/usage/config/config/",
+    ) -> None:
+        """Initialize ConfigError with detailed information.
+        Args:
+            message: The error message describing what went wrong.
+            file_path: Path to the configuration file that failed.
+            error_type: Type of error (e.g., 'file_not_found', 'parse_error', 'validation_error').
+            line_number: Line number where error occurred (for parse errors).
+            column_number: Column number where error occurred (for parse errors).
+            field_name: Field name that failed validation (for validation errors).
+            doc_url: Documentation URL for help and troubleshooting.
+        """
+        self.message = message
+        self.file_path = file_path
+        self.error_type = error_type
+        self.line_number = line_number
+        self.column_number = column_number
+        self.field_name = field_name
+        self.doc_url = doc_url
+        # Build comprehensive error message
+        parts = []
+        if file_path:
+            parts.append(f"Configuration file: '{file_path}'")
+        parts.append(message)
+        if line_number is not None:
+            location = f"line {line_number}"
+            if column_number is not None:
+                location += f", column {column_number}"
+            parts.append(f"Location: {location}")
+        if field_name:
+            parts.append(f"Field: {field_name}")
+        parts.append(f"See: {doc_url}")
+        full_message = "\n".join(parts)
+        super().__init__(full_message)

rdetoolkit/exceptions.pyi CHANGED Viewed

@@ -57,3 +57,24 @@ class NoResultsFoundError(Exception):
 class InvalidSearchParametersError(Exception):
     message: Incomplete
     def __init__(self, message: str = 'Invalid search term') -> None: ...
+class ConfigError(Exception):
+    message: str
+    file_path: str | None
+    error_type: str
+    line_number: int | None
+    column_number: int | None
+    field_name: str | None
+    doc_url: str
+    def __init__(
+        self,
+        message: str,
+        *,
+        file_path: str | None = None,
+        error_type: str = 'unknown',
+        line_number: int | None = None,
+        column_number: int | None = None,
+        field_name: str | None = None,
+        doc_url: str = 'https://nims-mdpf.github.io/rdetoolkit/usage/config/config/',
+    ) -> None: ...

rdetoolkit/invoicefile.py CHANGED Viewed

@@ -160,16 +160,18 @@ def check_exist_rawfiles(dfexcelinvoice: pd.DataFrame, excel_rawfiles: list[Path
         excel_rawfiles (list[Path]): A list of Path objects representing file paths.
     Raises:
-        tructuredError: If any file name in dfexcelinvoice is not found in excel_rawfiles.
+        StructuredError: If any file name in dfexcelinvoice is not found in excel_rawfiles.
     Returns:
         list[Path]: A list of Path objects corresponding to the file names in dfexcelinvoice, ordered as they appear in the DataFrame.
     """
-    file_set_group = {f.name for f in excel_rawfiles}
-    file_set_invoice = set(dfexcelinvoice["data_file_names/name"])
-    if file_set_invoice - file_set_group:
-        emsg = f"ERROR: raw file not found: {(file_set_invoice-file_set_group).pop()}"
-        raise StructuredError(emsg)
+    file_set_group = {f.name for f in excel_rawfiles}
+    file_set_invoice = set(dfexcelinvoice["data_file_names/name"])
+    if file_set_invoice - file_set_group:
+        missing = sorted(file_set_invoice - file_set_group)
+        missing_display = (str(name) for name in missing)
+        emsg = f"ERROR: raw file not found: {', '.join(missing_display)}"
+        raise StructuredError(emsg)
     # Sort excel_rawfiles in the order they appear in the invoice
     _tmp = {f.name: f for f in excel_rawfiles}
     try:
@@ -1152,7 +1154,8 @@ class RuleBasedReplacer:
         Args:
             replacements (Mapping[str, Any]): The object containing mapping rules (read-only).
             source_json_obj (MutableMapping[str, Any] | None): Objects of key and value to which you want to apply the rule (performs nested assignments).
-            mapping_rules (Mapping[str, str] | None, optional): Rules for mapping key and value (read-only). Defaults to None.
+            mapping_rules (Mapping[str, str] | None, optional): Rules for mapping key and value (read-only).
+                If None, uses self.rules. Defaults to None.
         Returns:
             dict[str, Any]: dictionary type data after conversion
@@ -1173,13 +1176,12 @@ class RuleBasedReplacer:
             result = replacer.apply_rules(replacement_rule, save_file_path, mapping_rules = rule)
             print(result)
         """
-        # [TODO] Correction of type definitions in version 0.1.6
         if mapping_rules is None:
             mapping_rules = self.rules
         if source_json_obj is None:
             source_json_obj = {}
-        for key, value in self.rules.items():
+        for key, value in mapping_rules.items():
             keys = key.split(".")
             replace_value = replacements.get(value, "")
             current_obj: MutableMapping[str, Any] = source_json_obj

rdetoolkit/models/metadata.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 from typing import Any, Final
-from pydantic import BaseModel, RootModel, field_validator
+from pydantic import AnyUrl, BaseModel, ConfigDict, Field, RootModel, field_validator
 MAX_VALUE_SIZE: Final[int] = 1024
@@ -71,7 +71,7 @@ class ValidableItems(RootModel):
 class MetadataItem(BaseModel):
-    """metadata-def.json class.
+    """metadata.json class.
     Stores metadata extracted by the data structuring process.
@@ -82,3 +82,98 @@ class MetadataItem(BaseModel):
     constant: dict[str, MetaValue]
     variable: ValidableItems
+class NameField(BaseModel):
+    """Multilingual name field for metadata definition.
+    Attributes:
+        ja: Japanese name
+        en: English name
+    """
+    ja: str
+    en: str
+class SchemaField(BaseModel):
+    """Schema field for metadata definition.
+    Attributes:
+        type: Type of the metadata value. One of "array", "boolean", "integer", "number", "string"
+        format: Optional format specifier. One of "date-time" or "duration"
+    """
+    type: str  # "array", "boolean", "integer", "number", "string"
+    format: str | None = None  # "date-time", "duration"
+class MetadataDefEntry(BaseModel):
+    """Single metadata definition entry in metadata-def.json.
+    Represents one metadata item definition. This is used for metadata-def.json,
+    not for metadata.json (which uses MetadataItem instead).
+    Attributes:
+        name: Multilingual name (ja/en required)
+        schema_field: Type and format definition (type required, serialized as "schema")
+        unit: Optional unit for the metadata value
+        description: Optional description
+        uri: Optional URI/URL for the metadata key
+        mode: Optional measurement mode
+        order: Optional display order
+        original_name: Optional original name (serialized as "originalName")
+    Example:
+        ```json
+        {
+            "temperature": {
+                "name": {"ja": "温度", "en": "Temperature"},
+                "schema": {"type": "number"},
+                "unit": "K"
+            }
+        }
+        ```
+    """
+    name: NameField
+    schema_field: SchemaField = Field(alias="schema")
+    unit: str | None = None
+    description: str | None = None
+    uri: AnyUrl | None = None
+    mode: str | None = None
+    order: int | None = None
+    original_name: str | None = Field(default=None, alias="originalName")
+    model_config = ConfigDict(
+        # Allow undefined fields (e.g., "variable" field is ignored per docs)
+        extra="allow",
+        # Enable alias for JSON parsing and serialization
+        populate_by_name=True,
+    )
+class MetadataDefinition(RootModel):
+    """metadata-def.json root model.
+    Represents the entire metadata definition file as a dictionary
+    mapping metadata keys to their definitions. This is used for
+    metadata-def.json, not for metadata.json (which uses MetadataItem instead).
+    Example:
+        ```json
+        {
+            "temperature": {
+                "name": {"ja": "温度", "en": "Temperature"},
+                "schema": {"type": "number"},
+                "unit": "K"
+            },
+            "operator": {
+                "name": {"ja": "測定者", "en": "Operator"},
+                "schema": {"type": "string"}
+            }
+        }
+        ```
+    """
+    root: dict[str, MetadataDefEntry]

rdetoolkit/models/metadata.pyi CHANGED Viewed

@@ -1,4 +1,4 @@
-from pydantic import BaseModel, RootModel
+from pydantic import AnyUrl, BaseModel, RootModel
 from typing import Any, Final
 MAX_VALUE_SIZE: Final[int]
@@ -20,3 +20,24 @@ class ValidableItems(RootModel):
 class MetadataItem(BaseModel):
     constant: dict[str, MetaValue]
     variable: ValidableItems
+class NameField(BaseModel):
+    ja: str
+    en: str
+class SchemaField(BaseModel):
+    type: str
+    format: str | None
+class MetadataDefEntry(BaseModel):
+    name: NameField
+    schema_field: SchemaField
+    unit: str | None
+    description: str | None
+    uri: AnyUrl | None
+    mode: str | None
+    order: int | None
+    original_name: str | None
+class MetadataDefinition(RootModel):
+    root: dict[str, MetadataDefEntry]

rdetoolkit/processing/processors/invoice.py CHANGED Viewed

@@ -411,6 +411,7 @@ class SmartTableInvoiceInitializer(Processor):
         """Apply SmartTable row data to invoice and collect metadata updates."""
         metadata_updates: dict[str, dict[str, Any]] = {}
         metadata_def: dict[str, Any] | None = None
+        csv_has_sample_owner_id = False
         # Handle empty CSV (no data rows)
         if len(csv_data) == 0:
@@ -435,10 +436,50 @@ class SmartTableInvoiceInitializer(Processor):
                 meta_key, meta_entry = self._process_meta_mapping(col, value, metadata_def)
                 metadata_updates[meta_key] = meta_entry
                 continue
+            # Track if sample/ownerId is explicitly specified in CSV
+            if col == "sample/ownerId":
+                csv_has_sample_owner_id = True
             self._process_mapping_key(col, value, invoice_data, invoice_schema_json_data)
+        # Set sample.ownerId to basic.dataOwnerId only if not specified in CSV
+        if not csv_has_sample_owner_id:
+            self._set_sample_owner_id(invoice_data)
         return metadata_updates
+    def _set_sample_owner_id(self, invoice_data: dict[str, Any]) -> None:
+        """Set sample.ownerId to basic.dataOwnerId for SmartTable processing.
+        This ensures that newly registered samples have the correct owner ID,
+        which should always be the data owner (registrant) rather than
+        any temporary sample owner selected in the invoice screen.
+        Args:
+            invoice_data: Invoice data dictionary to update.
+        Note:
+            - For new sample registration: Sets the correct owner ID
+            - For sample linking: The value is set but not used (safe to set)
+            - If basic.dataOwnerId is missing: Logs warning and preserves existing value
+        """
+        basic_section = invoice_data.get("basic", {})
+        data_owner_id = basic_section.get("dataOwnerId")
+        if data_owner_id is None or data_owner_id == "":
+            logger.warning(
+                "basic.dataOwnerId is missing or empty; sample.ownerId will not be updated. "
+                "This may cause incorrect sample owner assignment.",
+            )
+            return
+        sample_section = invoice_data.setdefault("sample", {})
+        sample_section["ownerId"] = data_owner_id
+        logger.debug(
+            "Set sample.ownerId to basic.dataOwnerId: %s",
+            data_owner_id,
+        )
     def _load_metadata_definition(self, metadata_def_path: Path) -> dict[str, Any]:
         """Load metadata definitions for SmartTable meta column processing.

rdetoolkit/validation.py CHANGED Viewed

@@ -29,7 +29,19 @@ def _pydantic_validation_error() -> type[PydanticValidationError]:
 class MetadataValidator:
+    """Validator for metadata files (metadata.json).
+    This validator checks metadata.json files against the
+    MetadataItem Pydantic model, ensuring proper structure
+    for actual metadata data.
+    Note:
+        This is separate from MetadataDefinitionValidator which validates
+        metadata-def.json files (metadata definitions).
+    """
     def __init__(self) -> None:
+        """Initialize metadata validator with schema."""
         from rdetoolkit.models.metadata import MetadataItem
         self.schema = MetadataItem
@@ -59,28 +71,102 @@ class MetadataValidator:
         if path is not None:
             __data = readf_json(path)
-        elif json_obj is not None:
-            __data = json_obj
         else:
-            emsg = "Unexpected validation error"
-            raise ValueError(emsg)
+            __data = json_obj
         self.schema(**__data)
         return __data
+class MetadataDefinitionValidator:
+    """Validator for metadata definition files (metadata-def.json).
+    This validator checks metadata-def.json files against the
+    MetadataDefinition Pydantic model, ensuring proper structure
+    for metadata definitions.
+    Note:
+        This is separate from MetadataValidator which validates
+        metadata.json files (actual metadata data).
+    """
+    def __init__(self) -> None:
+        """Initialize metadata definition validator with schema."""
+        from rdetoolkit.models.metadata import MetadataDefinition
+        self.schema = MetadataDefinition
+    def validate(
+        self,
+        *,
+        path: str | Path | None = None,
+        json_obj: dict[str, Any] | None = None,
+    ) -> dict[str, Any]:
+        """Validate metadata definition JSON against schema.
+        Args:
+            path: Path to metadata-def.json file to validate
+            json_obj: JSON object to validate (alternative to path)
+        Returns:
+            Validated JSON data as dict
+        Raises:
+            ValueError: If neither path nor json_obj provided, or both provided
+            MetadataValidationError: If validation fails with detailed error info
+        Examples:
+            >>> validator = MetadataDefinitionValidator()
+            >>> data = validator.validate(path="metadata-def.json")
+        """
+        # Input validation
+        if path is None and json_obj is None:
+            emsg = "At least one of 'path' or 'json_obj' must be provided"
+            raise ValueError(emsg)
+        if path is not None and json_obj is not None:
+            emsg = "Both 'path' and 'json_obj' cannot be provided at the same time"
+            raise ValueError(emsg)
+        # Load data
+        if path is not None:
+            __data = readf_json(path)
+        else:
+            __data = json_obj
+        # Validate with Pydantic model
+        try:
+            self.schema(__data)
+        except _pydantic_validation_error() as validation_error:
+            # Format error message for metadata-def.json
+            emsg = "Validation Errors in metadata-def.json. Please correct the following fields\n"
+            for idx, error in enumerate(validation_error.errors(), start=1):
+                # Extract field path (e.g., ['key', 'name', 'ja'])
+                field_path = ".".join([str(e) for e in error["loc"]])
+                emsg += f"{idx}. Field: {field_path}\n"
+                emsg += f"   Type: {error['type']}\n"
+                emsg += f"   Context: {error['msg']}\n"
+            raise MetadataValidationError(emsg) from validation_error
+        return __data
 def metadata_validate(path: str | Path) -> None:
     """Validate metadata.json file.
-    This function validates the metadata definition file specified by the given path.
-    It checks if the file exists and then uses a validator to validate the file against a schema.
+    This function validates the metadata.json file (actual metadata data)
+    specified by the given path. It checks if the file exists and then uses
+    MetadataValidator to validate the file against the MetadataItem schema.
+    Note:
+        This function is for metadata.json files. For metadata-def.json
+        files, use MetadataDefinitionValidator instead.
     Args:
-        path (Union[str, Path]): The path to the metadata definition file.
+        path (Union[str, Path]): The path to the metadata.json file.
     Raises:
         FileNotFoundError: If the schema and path do not exist.
-        MetadataValidationError: If there is an error in validating the metadata definition file.
+        MetadataValidationError: If there is an error in validating the metadata file.
     """
     if isinstance(path, str):
         path = Path(path)

rdetoolkit/validation.pyi CHANGED Viewed

@@ -8,6 +8,11 @@ class MetadataValidator:
     def __init__(self) -> None: ...
     def validate(self, *, path: str | Path | None = None, json_obj: dict[str, Any] | None = None) -> dict[str, Any]: ...
+class MetadataDefinitionValidator:
+    schema: Incomplete
+    def __init__(self) -> None: ...
+    def validate(self, *, path: str | Path | None = None, json_obj: dict[str, Any] | None = None) -> dict[str, Any]: ...
 def metadata_validate(path: str | Path) -> None: ...
 class InvoiceValidator:

rdetoolkit/workflows.py CHANGED Viewed

@@ -419,8 +419,14 @@ def run(*, custom_dataset_function: DatasetCallback | None = None, config: Confi
         Exception: If a generic error occurs during the process.
     Note:
-        If `extended_mode` is specified, the evaluation of the execution mode is performed in the order of `extended_mode -> excelinvoice -> invoice`,
-        and the structuring process is executed.
+        Execution mode is selected in the following order:
+        1. SmartTable CSV is present (`smarttable_file` is not None) -> `SmartTableInvoice` mode.
+        2. Excel invoice bundle is provided (`excel_invoice_files` is not None) -> `Excelinvoice` mode.
+        3. `extended_mode` matches (case-insensitive) `rdeformat` or `MultiDataTile` -> the corresponding extended mode.
+        4. Otherwise -> `Invoice` mode.
+        The mode name recorded in logs/results matches the branch that executed. No `excelinvoice` value is accepted in `extended_mode`.
     Example:
         ```python

{rdetoolkit-1.5.1.dist-info → rdetoolkit-1.5.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rdetoolkit
-Version: 1.5.1
+Version: 1.5.3
 Classifier: Development Status :: 3 - Alpha
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 3
@@ -36,6 +36,7 @@ Requires-Dist: pyarrow>=19.0.0
 Requires-Dist: pip>=24.3.1
 Requires-Dist: rpds-py>=0.26
 Requires-Dist: markdown>=3.7
+Requires-Dist: pytz>=2024.1
 Requires-Dist: types-pytz>=2025.2.0.20250326
 Requires-Dist: matplotlib>=3.9.4
 Requires-Dist: minio>=7.2.15 ; extra == 'minio'
@@ -51,11 +52,11 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
 Project-URL: Bug Tracker, https://github.com/nims-dpfc/rdetoolkit
 Project-URL: Homepage, https://github.com/nims-dpfc/rdetoolkit
-![GitHub Release](https://img.shields.io/github/v/release/nims-dpfc/rdetoolkit)
+![GitHub Release](https://img.shields.io/github/v/release/nims-mdpf/rdetoolkit)
 [![python.org](https://img.shields.io/badge/Python-3.9%7C3.10%7C3.11%7C3.12%7C3.13%7C3.14-%233776AB?logo=python)](https://www.python.org/downloads/release/python-3917/)
-[![MIT License](https://img.shields.io/badge/license-MIT-green)](https://github.com/nims-dpfc/rdetoolkit/blob/main/LICENSE)
-[![Issue](https://img.shields.io/badge/issue_tracking-github-orange)](https://github.com/nims-dpfc/rdetoolkit/issues)
-![workflow](https://github.com/nims-dpfc/rdetoolkit/actions/workflows/main.yml/badge.svg)
+[![MIT License](https://img.shields.io/badge/license-MIT-green)](https://github.com/nims-mdpf/rdetoolkit/blob/main/LICENSE)
+[![Issue](https://img.shields.io/badge/issue_tracking-github-orange)](https://github.com/nims-mdpf/rdetoolkit/issues)
+![workflow](https://github.com/nims-mdpf/rdetoolkit/actions/workflows/main.yml/badge.svg)
 ![coverage](docs/img/coverage.svg)
 > [日本語ドキュメント](docs/README_ja.md)
@@ -76,6 +77,13 @@ If you wish to make changes, please read the following document first:
 - [CONTRIBUTING.md](https://github.com/nims-mdpf/rdetoolkit/blob/main/CONTRIBUTING.md)
+## Requirements
+- **Python**: 3.9 or higher (Python 3.9 support will be removed in v2.0; upgrade to Python 3.10+ recommended)
+!!! warning "Python 3.9 Deprecation"
+    Python 3.9 support is deprecated and will be removed in rdetoolkit v2.0. While Python 3.9 continues to work in rdetoolkit 1.x, users will see a `DeprecationWarning` on import. Please plan to upgrade to Python 3.10 or later before the v2.0 release.
 ## Install
 To install, run the following command:
@@ -143,20 +151,20 @@ def dataset(paths: RdeDatasetPaths) -> None:
     ...
 ```
-In this example, we define a dummy function `display_messsage()` under `modules` to demonstrate how to implement custom structuring processing. Create a file named `modules/modules.py` as follows:
+In this example, we define a dummy function `display_message()` under `modules` to demonstrate how to implement custom structuring processing. Create a file named `modules/modules.py` as follows:
 ```python
 # modules/modules.py
 from rdetoolkit.models.rde2types import RdeDatasetPaths
-def display_messsage(path):
+def display_message(path):
     print(f"Test Message!: {path}")
 def dataset(paths: RdeDatasetPaths) -> None:
-    display_messsage(paths.inputdata)
-    display_messsage(paths.struct)
+    display_message(paths.inputdata)
+    display_message(paths.struct)
 ```
 ### About the Entry Point