dragon-ml-toolbox 12.6.0__py3-none-any.whl → 12.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-12.6.0.dist-info → dragon_ml_toolbox-12.7.0.dist-info}/METADATA +1 -1
- {dragon_ml_toolbox-12.6.0.dist-info → dragon_ml_toolbox-12.7.0.dist-info}/RECORD +7 -7
- ml_tools/utilities.py +49 -0
- {dragon_ml_toolbox-12.6.0.dist-info → dragon_ml_toolbox-12.7.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-12.6.0.dist-info → dragon_ml_toolbox-12.7.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-12.6.0.dist-info → dragon_ml_toolbox-12.7.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-12.6.0.dist-info → dragon_ml_toolbox-12.7.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
dragon_ml_toolbox-12.
|
|
2
|
-
dragon_ml_toolbox-12.
|
|
1
|
+
dragon_ml_toolbox-12.7.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
|
|
2
|
+
dragon_ml_toolbox-12.7.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
|
|
3
3
|
ml_tools/ETL_cleaning.py,sha256=2VBRllV8F-ZiPylPp8Az2gwn5ztgazN0BH5OKnRUhV0,20402
|
|
4
4
|
ml_tools/ETL_engineering.py,sha256=KfYqgsxupAx6e_TxwO1LZXeu5mFkIhVXJrNjP3CzIZc,54927
|
|
5
5
|
ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
|
|
@@ -34,8 +34,8 @@ ml_tools/math_utilities.py,sha256=PxoOrnuj6Ntp7_TJqyDWi0JX03WpAO5iaFNK2Oeq5I4,88
|
|
|
34
34
|
ml_tools/optimization_tools.py,sha256=ewYMAdSGlFxYALAGFXn-MsHpvW_Sbx6I-sKg9Kp6rB8,13533
|
|
35
35
|
ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
|
|
36
36
|
ml_tools/serde.py,sha256=UIshIesHRFmxr8F6B3LxGG8bYc1HHK-nlE3kENSZL18,5288
|
|
37
|
-
ml_tools/utilities.py,sha256=
|
|
38
|
-
dragon_ml_toolbox-12.
|
|
39
|
-
dragon_ml_toolbox-12.
|
|
40
|
-
dragon_ml_toolbox-12.
|
|
41
|
-
dragon_ml_toolbox-12.
|
|
37
|
+
ml_tools/utilities.py,sha256=OcAyV1tEcYAfOWlGjRgopsjDLxU3DcI5EynzvWV4q3A,15754
|
|
38
|
+
dragon_ml_toolbox-12.7.0.dist-info/METADATA,sha256=Ek5m6-ExduWUlSvDJ26AYySjU5aILoyiUVlP5dDYWyY,6166
|
|
39
|
+
dragon_ml_toolbox-12.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
+
dragon_ml_toolbox-12.7.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
|
|
41
|
+
dragon_ml_toolbox-12.7.0.dist-info/RECORD,,
|
ml_tools/utilities.py
CHANGED
|
@@ -12,6 +12,7 @@ from ._logger import _LOGGER
|
|
|
12
12
|
# Keep track of available tools
|
|
13
13
|
__all__ = [
|
|
14
14
|
"load_dataframe",
|
|
15
|
+
"load_dataframe_greedy",
|
|
15
16
|
"yield_dataframes_from_dir",
|
|
16
17
|
"merge_dataframes",
|
|
17
18
|
"save_dataframe_filename",
|
|
@@ -124,6 +125,54 @@ def load_dataframe(
|
|
|
124
125
|
return df, df_name # type: ignore
|
|
125
126
|
|
|
126
127
|
|
|
128
|
+
def load_dataframe_greedy(directory: Union[str, Path],
|
|
129
|
+
use_columns: Optional[list[str]] = None,
|
|
130
|
+
all_strings: bool = False,
|
|
131
|
+
verbose: bool = True) -> pd.DataFrame:
|
|
132
|
+
"""
|
|
133
|
+
Greedily loads the first found CSV file from a directory into a Pandas DataFrame.
|
|
134
|
+
|
|
135
|
+
This function scans the specified directory for any CSV files. It will
|
|
136
|
+
attempt to load the *first* CSV file it finds using the `load_dataframe`
|
|
137
|
+
function as a Pandas DataFrame.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
directory (str, Path):
|
|
141
|
+
The path to the directory to search for a CSV file.
|
|
142
|
+
use_columns (list[str] | None):
|
|
143
|
+
A list of column names to load. If None, all columns are loaded.
|
|
144
|
+
all_strings (bool):
|
|
145
|
+
If True, loads all columns as string data types.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
pd.DataFrame:
|
|
149
|
+
A pandas DataFrame loaded from the first CSV file found.
|
|
150
|
+
|
|
151
|
+
Raises:
|
|
152
|
+
FileNotFoundError:
|
|
153
|
+
If the specified directory does not exist or the CSV file path
|
|
154
|
+
found is invalid.
|
|
155
|
+
ValueError:
|
|
156
|
+
If the loaded DataFrame is empty or `use_columns` contains
|
|
157
|
+
invalid column names.
|
|
158
|
+
"""
|
|
159
|
+
# validate directory
|
|
160
|
+
dir_path = make_fullpath(directory, enforce="directory")
|
|
161
|
+
|
|
162
|
+
# list all csv files and grab one (should be the only one)
|
|
163
|
+
csv_dict = list_csv_paths(directory=dir_path, verbose=False)
|
|
164
|
+
|
|
165
|
+
for df_path in csv_dict.values():
|
|
166
|
+
df , _df_name = load_dataframe(df_path=df_path,
|
|
167
|
+
use_columns=use_columns,
|
|
168
|
+
kind="pandas",
|
|
169
|
+
all_strings=all_strings,
|
|
170
|
+
verbose=verbose)
|
|
171
|
+
break
|
|
172
|
+
|
|
173
|
+
return df
|
|
174
|
+
|
|
175
|
+
|
|
127
176
|
def yield_dataframes_from_dir(datasets_dir: Union[str,Path], verbose: bool=True):
|
|
128
177
|
"""
|
|
129
178
|
Iterates over all CSV files in a given directory, loading each into a Pandas DataFrame.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|