dragon-ml-toolbox 9.0.0__tar.gz → 9.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (40) hide show
  1. {dragon_ml_toolbox-9.0.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-9.1.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/ETL_engineering.py +2 -2
  4. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/handle_excel.py +26 -12
  5. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/pyproject.toml +1 -1
  6. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/LICENSE +0 -0
  7. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/LICENSE-THIRD-PARTY.md +0 -0
  8. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/README.md +0 -0
  9. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  10. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  11. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  12. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  13. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/GUI_tools.py +0 -0
  14. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/MICE_imputation.py +0 -0
  15. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/ML_callbacks.py +0 -0
  16. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/ML_datasetmaster.py +0 -0
  17. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/ML_evaluation.py +0 -0
  18. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/ML_evaluation_multi.py +0 -0
  19. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/ML_inference.py +0 -0
  20. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/ML_models.py +0 -0
  21. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/ML_optimization.py +0 -0
  22. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/ML_scaler.py +0 -0
  23. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/ML_trainer.py +0 -0
  24. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/PSO_optimization.py +0 -0
  25. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/RNN_forecast.py +0 -0
  26. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/SQL.py +0 -0
  27. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/VIF_factor.py +0 -0
  28. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/__init__.py +0 -0
  29. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/_logger.py +0 -0
  30. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/_script_info.py +0 -0
  31. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/custom_logger.py +0 -0
  32. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/data_exploration.py +0 -0
  33. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/ensemble_evaluation.py +0 -0
  34. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/ensemble_inference.py +0 -0
  35. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/ensemble_learning.py +0 -0
  36. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/keys.py +0 -0
  37. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/optimization_tools.py +0 -0
  38. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/path_manager.py +0 -0
  39. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/ml_tools/utilities.py +0 -0
  40. {dragon_ml_toolbox-9.0.0 → dragon_ml_toolbox-9.1.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 9.0.0
3
+ Version: 9.1.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 9.0.0
3
+ Version: 9.1.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -62,7 +62,7 @@ def save_unique_values(csv_path: Union[str, Path], output_dir: Union[str, Path])
62
62
 
63
63
  # --- 3. Process Each Column ---
64
64
  for i, column_name in enumerate(df.columns):
65
- _LOGGER.info(f"Processing column: '{column_name}'...")
65
+ # _LOGGER.info(f"Processing column: '{column_name}'...")
66
66
 
67
67
  # --- Get unique values AS IS ---
68
68
  try:
@@ -96,7 +96,7 @@ def save_unique_values(csv_path: Union[str, Path], output_dir: Union[str, Path])
96
96
  except IOError:
97
97
  _LOGGER.exception(f"Error writing to file {file_path}.")
98
98
  else:
99
- _LOGGER.info(f"Successfully saved {len(sorted_uniques)} unique values to '{file_path}'")
99
+ _LOGGER.info(f"Successfully saved {len(sorted_uniques)} unique values from '{column_name}'.")
100
100
 
101
101
  _LOGGER.info("Process complete.")
102
102
 
@@ -167,49 +167,63 @@ def validate_excel_schema(
167
167
  strict: bool = False
168
168
  ) -> None:
169
169
  """
170
- Validates that each Excel file in a directory conforms to the expected column schema.
170
+ Validates that each Excel file in a directory conforms to the expected column schema. Only the first worksheet of each file is analyzed.
171
171
 
172
172
  Parameters:
173
173
  target_dir (str | Path): Path to the directory containing Excel files.
174
174
  expected_columns (list[str]): List of expected column names.
175
175
  strict (bool): If True, columns must match exactly (names and order).
176
176
  If False, columns must contain at least all expected names.
177
-
178
- Returns:
179
- List[str]: List of file paths that failed the schema validation.
180
177
  """
181
- invalid_files: list[Path] = []
178
+ invalid_files: dict[str, str] = {}
182
179
  expected_set = set(expected_columns)
183
180
 
184
181
  target_path = make_fullpath(target_dir)
185
-
186
182
  excel_paths = find_excel_files(target_path)
187
183
 
188
184
  for file in excel_paths:
189
185
  try:
186
+ # Using first worksheet
190
187
  wb = load_workbook(file, read_only=True)
191
- ws = wb.active # Only check the first worksheet
188
+ ws = wb.active
192
189
 
193
190
  header = [cell.value for cell in next(ws.iter_rows(max_row=1))] # type: ignore
194
191
 
192
+ # Change 2: Detailed reason-finding logic
195
193
  if strict:
196
194
  if header != expected_columns:
197
- invalid_files.append(file)
195
+ header_set = set(header)
196
+ reason_parts = []
197
+ missing = sorted(list(expected_set - header_set)) # type: ignore
198
+ extra = sorted(list(header_set - expected_set)) # type: ignore
199
+
200
+ if missing:
201
+ reason_parts.append(f"Missing: {missing}")
202
+ if extra:
203
+ reason_parts.append(f"Extra: {extra}")
204
+ if not missing and not extra:
205
+ reason_parts.append("Incorrect column order")
206
+
207
+ invalid_files[file.name] = ". ".join(reason_parts)
198
208
  else:
199
209
  header_set = set(header)
200
210
  if not expected_set.issubset(header_set):
201
- invalid_files.append(file)
211
+ missing_cols = sorted(list(expected_set - header_set)) # type: ignore
212
+ reason = f"Missing required columns: {missing_cols}"
213
+ invalid_files[file.name] = reason
202
214
 
203
215
  except Exception as e:
204
216
  _LOGGER.error(f"Error processing '{file}': {e}")
205
- invalid_files.append(file)
217
+ invalid_files[file.name] = f"File could not be read. Error: {e}"
206
218
 
207
219
  valid_excel_number = len(excel_paths) - len(invalid_files)
208
220
  _LOGGER.info(f"{valid_excel_number} out of {len(excel_paths)} excel files conform to the schema.")
221
+
222
+ # Change 3: Updated print loop to show the reason
209
223
  if invalid_files:
210
224
  _LOGGER.warning(f"{len(invalid_files)} excel files are invalid:")
211
- for in_file in invalid_files:
212
- print(f" - {in_file.name}")
225
+ for file_name, reason in invalid_files.items():
226
+ print(f" - {file_name}: {reason}")
213
227
 
214
228
  return None
215
229
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "9.0.0"
3
+ version = "9.1.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl Loza", email = "luigiloza@gmail.com" }