PyPI - dragon-ml-toolbox - Versions diffs - 9.0.0__py3-none-any.whl → 9.1.0__py3-none-any.whl - Mend

dragon-ml-toolbox 9.0.0py3-none-any.whl → 9.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (8) hide show

{dragon_ml_toolbox-9.0.0.dist-info → dragon_ml_toolbox-9.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 9.0.0
+Version: 9.1.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-9.0.0.dist-info → dragon_ml_toolbox-9.1.0.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
-dragon_ml_toolbox-9.0.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
-dragon_ml_toolbox-9.0.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
-ml_tools/ETL_engineering.py,sha256=SH8b9BSR79cib49YpIixjayaruD0qftnW7FV3xskoOs,44876
+dragon_ml_toolbox-9.1.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
+dragon_ml_toolbox-9.1.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
+ml_tools/ETL_engineering.py,sha256=c-YyhfuNglUhDBbTN71-iHozWL7Y9E8KqNNS5hK1nA4,44883
 ml_tools/GUI_tools.py,sha256=kEQWg-bog3pB5tI22gMGKWaCGHnz9TB2Lvvfhf5F2CI,45412
 ml_tools/MICE_imputation.py,sha256=kVSythWfxJFR4-2mtcYCWQaQ1Oz5yyx_SJu5gjnS7H8,11670
 ml_tools/ML_callbacks.py,sha256=JPvEw_cW5tYNJ2rMSgnNrKLuni_UrmuhDFaOw-u2SvA,13926
@@ -24,12 +24,12 @@ ml_tools/data_exploration.py,sha256=hKA_3U-piJ8TtDWhzX_T2Awkg-25e0DC5E8qloqPo6w,
 ml_tools/ensemble_evaluation.py,sha256=xMEMfXJ5MjTkTfr1LkFOeD7iUtnVDCW3S9lm3zT-6tY,24778
 ml_tools/ensemble_inference.py,sha256=EFHnbjbu31fcVp88NBx8lWAVdu2Gpg9MY9huVZJHFfM,9350
 ml_tools/ensemble_learning.py,sha256=3s0kH4i_naj0IVl_T4knst-Hwg4TScWjEdsXX5KAi7I,21929
-ml_tools/handle_excel.py,sha256=p5BpBS9vhBhz3lqkk_WQ9Ef7EGedf2dp2cl0yekeRy4,13065
+ml_tools/handle_excel.py,sha256=He4UT15sCGhaG-JKfs7uYVAubxWjrqgJ6U7OhMR2fuE,14005
 ml_tools/keys.py,sha256=HtPG8-MWh89C32A7eIlfuuA-DLwkxGkoDfwR2TGN9CQ,1074
 ml_tools/optimization_tools.py,sha256=P3I6lIpvZ8Xf2kX5FvvBKBmrK2pB6idBpkTzfUJxTeE,5073
 ml_tools/path_manager.py,sha256=TJgoqMAryc5F0dal8W_zvJgE1TpOzlskIyYJk614WW4,13809
 ml_tools/utilities.py,sha256=zzfYR7SUSb2rZILTNoCjl_pfLlPdHf4263atXuEb3iE,19341
-dragon_ml_toolbox-9.0.0.dist-info/METADATA,sha256=FWDN8U9RARbPxbCBVrv4ZHqJys-LVo7M3dlyVwKdh74,6941
-dragon_ml_toolbox-9.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-9.0.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-9.0.0.dist-info/RECORD,,
+dragon_ml_toolbox-9.1.0.dist-info/METADATA,sha256=NMMsbllyABDp8fVflbBWJ9aIQ0KemBI-3hBlj5JhE2E,6941
+dragon_ml_toolbox-9.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-9.1.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-9.1.0.dist-info/RECORD,,

ml_tools/ETL_engineering.py CHANGED Viewed

@@ -62,7 +62,7 @@ def save_unique_values(csv_path: Union[str, Path], output_dir: Union[str, Path])
     # --- 3. Process Each Column ---
     for i, column_name in enumerate(df.columns):
-        _LOGGER.info(f"Processing column: '{column_name}'...")
+        # _LOGGER.info(f"Processing column: '{column_name}'...")
         # --- Get unique values AS IS ---
         try:
@@ -96,7 +96,7 @@ def save_unique_values(csv_path: Union[str, Path], output_dir: Union[str, Path])
         except IOError:
             _LOGGER.exception(f"Error writing to file {file_path}.")
         else:
-            _LOGGER.info(f"Successfully saved {len(sorted_uniques)} unique values to '{file_path}'")
+            _LOGGER.info(f"Successfully saved {len(sorted_uniques)} unique values from '{column_name}'.")
     _LOGGER.info("Process complete.")

ml_tools/handle_excel.py CHANGED Viewed

@@ -167,49 +167,63 @@ def validate_excel_schema(
     strict: bool = False
 ) -> None:
     """
-    Validates that each Excel file in a directory conforms to the expected column schema.
+    Validates that each Excel file in a directory conforms to the expected column schema. Only the first worksheet of each file is analyzed.
     Parameters:
         target_dir (str | Path): Path to the directory containing Excel files.
         expected_columns (list[str]): List of expected column names.
         strict (bool): If True, columns must match exactly (names and order).
                       If False, columns must contain at least all expected names.
-    Returns:
-        List[str]: List of file paths that failed the schema validation.
     """
-    invalid_files: list[Path] = []
+    invalid_files: dict[str, str] = {}
     expected_set = set(expected_columns)
     target_path = make_fullpath(target_dir)
     excel_paths = find_excel_files(target_path)
     for file in excel_paths:
         try:
+            # Using first worksheet
             wb = load_workbook(file, read_only=True)
-            ws = wb.active  # Only check the first worksheet
+            ws = wb.active
             header = [cell.value for cell in next(ws.iter_rows(max_row=1))] # type: ignore
+            # Change 2: Detailed reason-finding logic
             if strict:
                 if header != expected_columns:
-                    invalid_files.append(file)
+                    header_set = set(header)
+                    reason_parts = []
+                    missing = sorted(list(expected_set - header_set)) # type: ignore
+                    extra = sorted(list(header_set - expected_set)) # type: ignore
+                    if missing:
+                        reason_parts.append(f"Missing: {missing}")
+                    if extra:
+                        reason_parts.append(f"Extra: {extra}")
+                    if not missing and not extra:
+                        reason_parts.append("Incorrect column order")
+                    invalid_files[file.name] = ". ".join(reason_parts)
             else:
                 header_set = set(header)
                 if not expected_set.issubset(header_set):
-                    invalid_files.append(file)
+                    missing_cols = sorted(list(expected_set - header_set)) # type: ignore
+                    reason = f"Missing required columns: {missing_cols}"
+                    invalid_files[file.name] = reason
         except Exception as e:
             _LOGGER.error(f"Error processing '{file}': {e}")
-            invalid_files.append(file)
+            invalid_files[file.name] = f"File could not be read. Error: {e}"
     valid_excel_number = len(excel_paths) - len(invalid_files)
     _LOGGER.info(f"{valid_excel_number} out of {len(excel_paths)} excel files conform to the schema.")
+    # Change 3: Updated print loop to show the reason
     if invalid_files:
         _LOGGER.warning(f"{len(invalid_files)} excel files are invalid:")
-        for in_file in invalid_files:
-            print(f"  - {in_file.name}")
+        for file_name, reason in invalid_files.items():
+            print(f"  - {file_name}: {reason}")
     return None

{dragon_ml_toolbox-9.0.0.dist-info → dragon_ml_toolbox-9.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-9.0.0.dist-info → dragon_ml_toolbox-9.1.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-9.0.0.dist-info → dragon_ml_toolbox-9.1.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-9.0.0.dist-info → dragon_ml_toolbox-9.1.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 9.0.0__py3-none-any.whl → 9.1.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 9.0.0py3-none-any.whl → 9.1.0py3-none-any.whl