dragon-ml-toolbox 10.10.0__py3-none-any.whl → 10.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 10.10.0
3
+ Version: 10.11.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,5 +1,5 @@
1
- dragon_ml_toolbox-10.10.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
2
- dragon_ml_toolbox-10.10.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
1
+ dragon_ml_toolbox-10.11.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
2
+ dragon_ml_toolbox-10.11.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
3
3
  ml_tools/ETL_cleaning.py,sha256=lSP5q6-ukGhJBPV8dlsqJvPXAzj4du_0J-SbtEd0Pjg,19292
4
4
  ml_tools/ETL_engineering.py,sha256=a6KCWH6kRatZtjaFEF_o917ApPMK5_vRD-BjfCDAl-E,49400
5
5
  ml_tools/GUI_tools.py,sha256=kEQWg-bog3pB5tI22gMGKWaCGHnz9TB2Lvvfhf5F2CI,45412
@@ -15,7 +15,7 @@ ml_tools/ML_scaler.py,sha256=h2ymq5u953Lx60Qb38Y0mAWj85x9PbnP0xYNQ3pd8-w,7535
15
15
  ml_tools/ML_trainer.py,sha256=_g48w5Ak-wQr5fGHdJqlcpnzv3gWyL1ghkOhy9VOZbo,23930
16
16
  ml_tools/PSO_optimization.py,sha256=q0VYpssQGbPum7xdnkDXlJQKhZMYZo8acHpKhajPK3c,22954
17
17
  ml_tools/RNN_forecast.py,sha256=8rNZr-eWOBXMiDQV22e_tQTPM5LM2IFggEAa1FaoXaI,1965
18
- ml_tools/SQL.py,sha256=givoz6CGWRUdqnBem3VGZxzGdo3ZbX00kyHNjzI8kWE,10803
18
+ ml_tools/SQL.py,sha256=rPeKywvwJ5oHYVUQUovO3OUkXQTxBT9Dvwb6E2ntphY,11233
19
19
  ml_tools/VIF_factor.py,sha256=MkMh_RIdsN2XUPzKNGRiEcmB17R_MmvGV4ezpL5zD2E,10403
20
20
  ml_tools/__init__.py,sha256=q0y9faQ6e17XCQ7eUiCZ1FJ4Bg5EQqLjZ9f_l5REUUY,41
21
21
  ml_tools/_logger.py,sha256=wcImAiXEZKPNcwM30qBh3t7HvoPURonJY0nrgMGF0sM,4719
@@ -28,9 +28,9 @@ ml_tools/ensemble_learning.py,sha256=3s0kH4i_naj0IVl_T4knst-Hwg4TScWjEdsXX5KAi7I
28
28
  ml_tools/handle_excel.py,sha256=He4UT15sCGhaG-JKfs7uYVAubxWjrqgJ6U7OhMR2fuE,14005
29
29
  ml_tools/keys.py,sha256=FDpbS3Jb0pjrVvvp2_8nZi919mbob_-xwuy5OOtKM_A,1848
30
30
  ml_tools/optimization_tools.py,sha256=P3I6lIpvZ8Xf2kX5FvvBKBmrK2pB6idBpkTzfUJxTeE,5073
31
- ml_tools/path_manager.py,sha256=wLJlz3Y9_1-LB9em4B2VYDCVuTOX2eOc7D6hbbebjgM,14990
31
+ ml_tools/path_manager.py,sha256=CCZSlHpUiuaHsMAYcmMGZ9GvbHNbbrTqYFicgWz6pRs,17883
32
32
  ml_tools/utilities.py,sha256=30z0x1aDLyBGzF98_tgSaxwFafYwQS-GTFzXHopBSGc,29105
33
- dragon_ml_toolbox-10.10.0.dist-info/METADATA,sha256=hSrcYAuoE1H0uF77-8TClwrcdlQwg0f1BGixlh_Q0Wo,6969
34
- dragon_ml_toolbox-10.10.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
- dragon_ml_toolbox-10.10.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
36
- dragon_ml_toolbox-10.10.0.dist-info/RECORD,,
33
+ dragon_ml_toolbox-10.11.0.dist-info/METADATA,sha256=dUnqRVopM0cM7AjWbhS2ife0tKObinp6J0vwUt-rJ-A,6969
34
+ dragon_ml_toolbox-10.11.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
+ dragon_ml_toolbox-10.11.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
36
+ dragon_ml_toolbox-10.11.0.dist-info/RECORD,,
ml_tools/SQL.py CHANGED
@@ -120,12 +120,14 @@ class DatabaseManager:
120
120
  if not self.cursor:
121
121
  _LOGGER.error("Database connection is not open.")
122
122
  raise sqlite3.Error()
123
+
124
+ sanitized_table_name = sanitize_filename(table_name)
123
125
 
124
126
  columns = ', '.join(f'"{k}"' for k in data.keys())
125
127
  placeholders = ', '.join(['?'] * len(data))
126
128
  values = list(data.values())
127
129
 
128
- query = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
130
+ query = f'INSERT INTO "{sanitized_table_name}" ({columns}) VALUES ({placeholders})'
129
131
 
130
132
  self.cursor.execute(query, values)
131
133
 
@@ -187,6 +189,8 @@ class DatabaseManager:
187
189
  if not data:
188
190
  _LOGGER.warning("'insert_many' called with empty data list. No action taken.")
189
191
  return
192
+
193
+ sanitized_table_name = sanitize_filename(table_name)
190
194
 
191
195
  # Assume all dicts have the same keys as the first one
192
196
  first_row = data[0]
@@ -196,10 +200,10 @@ class DatabaseManager:
196
200
  # Create a list of tuples, where each tuple is a row of values
197
201
  values_to_insert = [list(row.values()) for row in data]
198
202
 
199
- query = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
203
+ query = f'INSERT INTO "{sanitized_table_name}" ({columns}) VALUES ({placeholders})'
200
204
 
201
205
  self.cursor.executemany(query, values_to_insert)
202
- _LOGGER.info(f"➡️ Bulk inserted {len(values_to_insert)} rows into '{table_name}'.")
206
+ _LOGGER.info(f"➡️ Bulk inserted {len(values_to_insert)} rows into '{sanitized_table_name}'.")
203
207
 
204
208
  def insert_from_dataframe(self, table_name: str, df: pd.DataFrame, if_exists: Literal['fail', 'replace', 'append'] = 'append'):
205
209
  """
@@ -220,9 +224,11 @@ class DatabaseManager:
220
224
  if not self.conn:
221
225
  _LOGGER.error("Database connection is not open.")
222
226
  raise sqlite3.Error()
227
+
228
+ sanitized_table_name = sanitize_filename(table_name)
223
229
 
224
230
  df.to_sql(
225
- table_name,
231
+ sanitized_table_name,
226
232
  self.conn,
227
233
  if_exists=if_exists,
228
234
  index=False # Typically, we don't want to save the DataFrame index
@@ -248,9 +254,11 @@ class DatabaseManager:
248
254
  if not self.conn:
249
255
  _LOGGER.error("Database connection is not open.")
250
256
  raise sqlite3.Error()
257
+
258
+ sanitized_table_name = sanitize_filename(table_name)
251
259
 
252
260
  # PRAGMA is a special SQL command in SQLite for database metadata
253
- return pd.read_sql_query(f'PRAGMA table_info("{table_name}");', self.conn)
261
+ return pd.read_sql_query(f'PRAGMA table_info("{sanitized_table_name}");', self.conn)
254
262
 
255
263
  def create_index(self, table_name: str, column_name: str, unique: bool = False):
256
264
  """
@@ -269,11 +277,13 @@ class DatabaseManager:
269
277
  if not self.cursor:
270
278
  _LOGGER.error("Database connection is not open.")
271
279
  raise sqlite3.Error()
280
+
281
+ sanitized_table_name = sanitize_filename(table_name)
272
282
 
273
- index_name = f"idx_{table_name}_{column_name}"
283
+ index_name = f"idx_{sanitized_table_name}_{column_name}"
274
284
  unique_clause = "UNIQUE" if unique else ""
275
285
 
276
- query = f"CREATE {unique_clause} INDEX IF NOT EXISTS {index_name} ON {table_name} ({column_name})"
286
+ query = f'CREATE {unique_clause} INDEX IF NOT EXISTS "{index_name}" ON "{sanitized_table_name}" ("{column_name}")'
277
287
 
278
288
  _LOGGER.info(f"➡️ Executing: {query}")
279
289
  self.cursor.execute(query)
ml_tools/path_manager.py CHANGED
@@ -23,13 +23,33 @@ class PathManager:
23
23
  "path database". It supports both development mode and applications
24
24
  bundled with Pyinstaller or Nuitka.
25
25
 
26
- Supports python dictionary syntax.
26
+ All keys provided to the manager are automatically sanitized to ensure
27
+ they are valid Python identifiers. This allows for clean, attribute-style
28
+ access. The sanitization process involves replacing whitespace with
29
+ underscores and removing special characters.
27
30
  """
28
31
  def __init__(
29
32
  self,
30
33
  anchor_file: str,
31
34
  base_directories: Optional[List[str]] = None
32
35
  ):
36
+ """
37
+ Sets up the core paths for a project by anchoring to a specific file.
38
+
39
+ The manager automatically registers a 'ROOT' path, which points to the
40
+ root of the package, and can pre-register common subdirectories found
41
+ directly within that root.
42
+
43
+ Args:
44
+ anchor_file (str): The path to a file within your package, typically
45
+ the `__file__` of the script where PathManager
46
+ is instantiated. This is used to locate the
47
+ package root directory.
48
+ base_directories (List[str] | None): An optional list of strings,
49
+ where each string is the name
50
+ of a subdirectory to register
51
+ relative to the package root.
52
+ """
33
53
  resolved_anchor_path = Path(anchor_file).resolve()
34
54
  self._package_name = resolved_anchor_path.parent.name
35
55
  self._is_bundled, bundle_root = self._get_bundle_root()
@@ -43,13 +63,17 @@ class PathManager:
43
63
  package_root = resolved_anchor_path.parent
44
64
 
45
65
  # Register the root of the package itself
46
- self._paths["ROOT"] = package_root
66
+ self.ROOT = package_root
47
67
 
48
68
  # Register all the base directories
49
69
  if base_directories:
50
70
  for dir_name in base_directories:
51
- # This logic works for both dev mode and bundled mode
52
- self._paths[dir_name] = package_root / dir_name
71
+ sanitized_dir_name = self._sanitize_key(dir_name)
72
+ self._check_underscore_key(sanitized_dir_name)
73
+ setattr(self, sanitized_dir_name, package_root / sanitized_dir_name)
74
+
75
+ # Signal that initialization is complete.
76
+ self._initialized = True
53
77
 
54
78
  def _get_bundle_root(self) -> tuple[bool, Optional[str]]:
55
79
  """
@@ -72,47 +96,35 @@ class PathManager:
72
96
  # --- Not Bundled ---
73
97
  else:
74
98
  return False, None
99
+
100
+ def _check_underscore_key(self, key: str) -> None:
101
+ if key.startswith("_"):
102
+ _LOGGER.error(f"Path key '{key}' cannot start with underscores.")
103
+ raise ValueError()
75
104
 
76
- def get(self, key: str) -> Path:
77
- """
78
- Retrieves a stored path by its key.
79
-
80
- Args:
81
- key (str): The key of the path to retrieve.
82
-
83
- Returns:
84
- Path: The resolved, absolute Path object.
85
-
86
- Raises:
87
- KeyError: If the key is not found in the manager.
88
- """
89
- try:
90
- return self._paths[key]
91
- except KeyError:
92
- _LOGGER.error(f"Path key '{key}' not found.")
93
- raise
94
-
95
- def update(self, new_paths: Dict[str, Union[str, Path]], overwrite: bool = False) -> None:
105
+ def update(self, new_paths: Dict[str, Union[str, Path]]) -> None:
96
106
  """
97
- Adds new paths or overwrites existing ones in the manager.
107
+ Adds new paths in the manager.
98
108
 
99
109
  Args:
100
110
  new_paths (Dict[str, Union[str, Path]]): A dictionary where keys are
101
111
  the identifiers and values are the
102
- Path objects or strings to store.
103
- overwrite (bool): If False (default), raises a KeyError if any
104
- key in new_paths already exists. If True,
105
- allows overwriting existing keys.
112
+ Path objects to store.
106
113
  """
107
- if not overwrite:
108
- for key in new_paths:
109
- if key in self._paths:
110
- _LOGGER.error(f"Path key '{key}' already exists in the manager. To replace it, call update() with overwrite=True.")
111
- raise KeyError
112
-
113
- # Resolve any string paths to Path objects before storing
114
- resolved_new_paths = {k: Path(v) for k, v in new_paths.items()}
115
- self._paths.update(resolved_new_paths)
114
+ # Pre-check
115
+ for key in new_paths:
116
+ sanitized_key = self._sanitize_key(key)
117
+ self._check_underscore_key(sanitized_key)
118
+ if hasattr(self, sanitized_key):
119
+ _LOGGER.error(f"Cannot add path for key '{sanitized_key}' ({key}): an attribute with this name already exists.")
120
+ raise KeyError()
121
+
122
+ # If no conflicts, add new paths
123
+ for key, value in new_paths.items():
124
+ self.__setattr__(key, value)
125
+
126
+ def _sanitize_key(self, key: str):
127
+ return sanitize_filename(key)
116
128
 
117
129
  def make_dirs(self, keys: Optional[List[str]] = None, verbose: bool = False) -> None:
118
130
  """
@@ -147,7 +159,7 @@ class PathManager:
147
159
  if path.suffix: # It's a file, not a directory
148
160
  continue
149
161
 
150
- # --- THE CRITICAL CHECK ---
162
+ # --- CRITICAL CHECK ---
151
163
  # Determine if the path is inside the main application package.
152
164
  is_internal_path = package_root and path.is_relative_to(package_root)
153
165
 
@@ -186,15 +198,20 @@ class PathManager:
186
198
  # --- Dictionary-Style Methods ---
187
199
  def __getitem__(self, key: str) -> Path:
188
200
  """Allows dictionary-style getting, e.g., PM['my_key']"""
189
- return self.get(key)
201
+ return self.__getattr__(key)
190
202
 
191
203
  def __setitem__(self, key: str, value: Union[str, Path]):
192
- """Allows dictionary-style setting, does not allow overwriting, e.g., PM['my_key'] = path"""
193
- self.update({key: value}, overwrite=False)
204
+ """Allows dictionary-style setting, e.g., PM['my_key'] = path"""
205
+ sanitized_key = self._sanitize_key(key)
206
+ self._check_underscore_key(sanitized_key)
207
+ self.__setattr__(sanitized_key, value)
194
208
 
195
209
  def __contains__(self, key: str) -> bool:
196
210
  """Allows checking for a key's existence, e.g., if 'my_key' in PM"""
197
- return key in self._paths
211
+ sanitized_key = self._sanitize_key(key)
212
+ true_false = sanitized_key in self._paths
213
+ # print(f"key {sanitized_key} in current path dictionary keys: {true_false}")
214
+ return true_false
198
215
 
199
216
  def __len__(self) -> int:
200
217
  """Allows getting the number of paths, e.g., len(PM)"""
@@ -211,6 +228,47 @@ class PathManager:
211
228
  def items(self):
212
229
  """Returns all registered (key, Path) pairs."""
213
230
  return self._paths.items()
231
+
232
+ def __getattr__(self, name: str) -> Path:
233
+ """
234
+ Allows attribute-style access to paths, e.g., PM.data.
235
+ """
236
+ # Block access to private attributes
237
+ if name.startswith('_'):
238
+ _LOGGER.error(f"Access to private attribute '{name}' is not allowed, remove leading underscore.")
239
+ raise AttributeError()
240
+
241
+ sanitized_name = self._sanitize_key(name)
242
+
243
+ try:
244
+ # Look for the key in our internal dictionary
245
+ return self._paths[sanitized_name]
246
+ except KeyError:
247
+ # If not found, raise an AttributeError
248
+ _LOGGER.error(f"'{type(self).__name__}' object has no attribute or path key '{sanitized_name}'")
249
+ raise AttributeError()
250
+
251
+ def __setattr__(self, name: str, value: Union[str, Path]):
252
+ """Allows attribute-style setting of paths, e.g., PM.data = 'path/to/data'."""
253
+ # Check for internal attributes
254
+ if name.startswith('_'):
255
+ if hasattr(self, '_initialized') and self._initialized:
256
+ self._check_underscore_key(name)
257
+ return
258
+ else:
259
+ # During initialization, allow private attributes to be set.
260
+ super().__setattr__(name, value)
261
+ return
262
+
263
+ # Block overwriting of existing methods/attributes
264
+ sanitized_name = self._sanitize_key(name)
265
+ self._check_underscore_key(sanitized_name)
266
+ if hasattr(self, sanitized_name):
267
+ _LOGGER.error(f"Cannot overwrite existing attribute or method '{sanitized_name}' ({name}).")
268
+ raise AttributeError()
269
+
270
+ # If all checks pass, treat it as a public path.
271
+ self._paths[sanitized_name] = Path(value)
214
272
 
215
273
 
216
274
  def make_fullpath(