advanced-excel 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ """
2
+ advanced_excel package (standard Python package name).
3
+
4
+ This makes the library fully importable and compatible after `pip install -e .`
5
+ or `pip install advanced-excel`.
6
+
7
+ Recommended import:
8
+ from advanced_excel import AdvancedExcel
9
+
10
+ The main class is AdvancedExcel (CamelCase is standard for classes).
11
+ Implementation is in core.py (the thin class + mixin imports) and the various
12
+ *Mixin modules.
13
+ """
14
+
15
+ from .core import AdvancedExcel
16
+ from .core import ROW_INDEX, COL_INDEX, DATA, __version__
17
+
18
+ __all__ = ["AdvancedExcel", "ROW_INDEX", "COL_INDEX", "DATA", "__version__"]
19
+
@@ -0,0 +1,171 @@
1
+ class BlockTableMixin:
2
+ """
3
+ Mixin for detecting and extracting multiple tables / blocks / entities
4
+ inside a single sheet (repeated key sections, "Batch Number" style blocks, etc.).
5
+ This is the core of the "advanced" multi-table Excel handling.
6
+ """
7
+
8
+ def get_all_tables(self, _sheet, mincol=3):
9
+ """
10
+ Identifies and extracts table-like structures from a sheet.
11
+
12
+ This method searches for contiguous blocks of rows that have at least `mincol`
13
+ non-NaN values. These blocks are interpreted as tables. It iterates through
14
+ the rows of the sheet. When it encounters a row with at least `mincol`
15
+ valid values, it marks the beginning of a potential table. When it finds a row
16
+ with fewer than `mincol` valid values, it considers the preceding block of
17
+ rows as a complete table and extracts it using the `_getTable` method.
18
+
19
+ Args:
20
+ _sheet (pandas.DataFrame): The sheet to search for tables within.
21
+ mincol (int, optional): The minimum number of non-NaN values required
22
+ for a row to be considered part of a table. Defaults to 3.
23
+
24
+ Returns:
25
+ list: A list of pandas DataFrames, where each DataFrame represents
26
+ a table-like structure found in the sheet. Returns an empty
27
+ list if no tables are found.
28
+ """
29
+
30
+ allTables = []
31
+ init_table = -1
32
+
33
+ for irow in range(_sheet.shape[0]):
34
+ if len(_sheet.loc[irow].dropna()) >= mincol:
35
+ if init_table == -1:
36
+ init_table = irow
37
+ else:
38
+ if init_table >= 0:
39
+ table = self._getTable(_sheet, init_table, irow)
40
+ allTables.append(table)
41
+ init_table = -1
42
+
43
+ if init_table >= 0:
44
+ table = self._getTable(_sheet, init_table, _sheet.shape[0])
45
+ allTables.append(table)
46
+
47
+ return allTables
48
+
49
+ def get_dataframe_blocks_by_key_name(self, df, key_name):
50
+ """used for sheet that contains different blocks of information,
51
+ it splits the all data sheet into splited dataframes to be parser later on
52
+ The Split is using the recognition of the first value on a cell that is repeated at the beginning
53
+ of each block"""
54
+
55
+ allDfBlocks = []
56
+
57
+ identified_rows = self.get_all_rows_from_key(df, key_name)
58
+ list_index_of_rows = list(map(lambda x: x["row_index"], identified_rows))
59
+
60
+ for index in range(len(list_index_of_rows)):
61
+ ini = list_index_of_rows[index]
62
+ if index + 1 > len(list_index_of_rows) - 1:
63
+ end = df.shape[0]
64
+ else:
65
+ end = list_index_of_rows[index + 1]
66
+
67
+ block = df.iloc[ini:end].dropna(axis=1, how="all").dropna(axis=0, how="all")
68
+
69
+ block = block.reset_index(drop=True)
70
+ allDfBlocks.append(block)
71
+
72
+ return allDfBlocks
73
+
74
+ def get_dataframe_blocks_by_key_column(self, df, key_column):
75
+ """
76
+ Splits a DataFrame into blocks based on occurrences of a keyname.
77
+
78
+ This method is designed for sheets containing multiple blocks of information,
79
+ separated by a repeated `key_name` at the beginning of each block. It identifies
80
+ the rows containing the `key_name` and uses their indices to split the DataFrame
81
+ into individual blocks. Each block is then cleaned by removing rows and columns
82
+ that are entirely NaN.
83
+
84
+ Args:
85
+ df (pandas.DataFrame): The DataFrame to split.
86
+ key_name (str): The keyname that marks the beginning of each block.
87
+
88
+ Returns:
89
+ list: A list of pandas DataFrames, where each DataFrame represents a
90
+ block of data. Returns an empty list if the `key_name` is not found.
91
+ """
92
+
93
+ allDfBlocks = []
94
+
95
+ list_index_of_columns = self._getAllColumnsFromKey(df, key_column)
96
+
97
+ for index in range(len(list_index_of_columns)):
98
+ ini = list_index_of_columns[index]
99
+ if index + 1 > len(list_index_of_columns) - 1:
100
+ end = df.shape[1]
101
+ else:
102
+ end = list_index_of_columns[index + 1]
103
+
104
+ block = df.iloc[:, ini:end].dropna(axis=0, how="all").dropna(axis=1, how="all")
105
+
106
+ block = block.reset_index(drop=True)
107
+ allDfBlocks.append(block)
108
+
109
+ return allDfBlocks
110
+
111
+ def _getAllColumnsFromKey(self, df, key_column):
112
+ """
113
+ Returns a list of indices for columns matching the specified key column name.
114
+
115
+ This method searches for columns in the DataFrame whose names match `key_column`
116
+ and returns a list of their integer indices.
117
+
118
+ Args:
119
+ df (pandas.DataFrame): The DataFrame to search.
120
+ key_column (str): The name of the column to search for.
121
+
122
+ Returns:
123
+ list: A list of integer indices of the matching columns. Returns an
124
+ empty list if no matching columns are found.
125
+ """
126
+ key_indices = []
127
+ for i, column in enumerate(df.columns):
128
+ if column == key_column:
129
+ key_indices.append(i)
130
+ return key_indices
131
+
132
+ def _getTable(self, _sheet, init_table, end_table):
133
+ """
134
+ Extracts a table (DataFrame) from a sheet within specified row boundaries.
135
+
136
+ This method extracts a portion of the input sheet (`_sheet`) between `init_table`
137
+ and `end_table` (exclusive) as a new DataFrame. It then cleans the table by
138
+ removing rows and columns that are entirely NaN, sets the first row as the header
139
+ (after cleaning and formatting it), and resets the index.
140
+
141
+ Args:
142
+ _sheet (pandas.DataFrame): The sheet (DataFrame) to extract the table from.
143
+ init_table (int): The starting row index (inclusive).
144
+ end_table (int): The ending row index (exclusive).
145
+
146
+ Returns:
147
+ pandas.DataFrame: A new DataFrame representing the extracted table.
148
+ A copy of the DataFrame is created, so the original DataFrame is not modified in place.
149
+ """
150
+ table = (
151
+ _sheet.iloc[init_table:end_table].dropna(axis=1, how="all").dropna(axis=0, how="all")
152
+ ) # Extract and clean
153
+
154
+ # Set the header from the first row after cleaning and formatting:
155
+ table.columns = [str(s).strip().upper().replace(" ", "_") for s in table.iloc[0]]
156
+ table = table.drop(0).reset_index(
157
+ drop=True
158
+ ) # Remove the first row (old header) and reset index
159
+ return table
160
+
161
+ def _headerColumnsAreEmpty(self, columns):
162
+ """
163
+ Checks if all column names in a Series start with "Unnamed:".
164
+
165
+ Args:
166
+ columns (pandas.Series): The Series containing the column names.
167
+
168
+ Returns:
169
+ bool: True if all column names start with "Unnamed:", False otherwise.
170
+ """
171
+ return columns.str.contains("^Unnamed:").all()
@@ -0,0 +1,420 @@
1
+ import re
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+
7
+ class CleaningMixin:
8
+ """
9
+ Mixin for value/string/NaN/header/dtype cleaning and transformation operations.
10
+ Includes replace*, strip, case, rename, numeric conversion helpers, etc.
11
+ """
12
+
13
+ def replace_nan_in_column_by_nan_strings(self, df, columnName):
14
+ """
15
+ Replaces NaN (Not a Number) values in a specified column of a DataFrame with the string "NA".
16
+
17
+ This method modifies the DataFrame in place by filling any NaN values in the column
18
+ specified by `columnName` with the string "NA". This is often useful when you
19
+ need to represent missing data with a specific string value, for example, when
20
+ exporting the data to a format that doesn't handle NaN values well.
21
+
22
+ Args:
23
+ df (pandas.DataFrame): The DataFrame to modify.
24
+ columnName (str): The name of the column in which to replace NaN values.
25
+
26
+ Returns:
27
+ pandas.DataFrame: The modified DataFrame (the modification is done in place).
28
+ """
29
+ df.loc[:, columnName] = df[columnName].fillna("NA")
30
+ return df
31
+
32
+ def replace_nan_strings(self, df):
33
+ """
34
+ Replaces string representations of NaN values with actual NaN values in a DataFrame.
35
+
36
+ This method searches for specific string values ('NA_NA', 'NA', 'NAN', 'na_na', 'na', 'nan')
37
+ that are often used to represent missing data and replaces them with actual
38
+ NaN (Not a Number) values. This ensures consistency in how missing data is
39
+ handled within the DataFrame, allowing pandas functions to correctly interpret
40
+ and process the missing values.
41
+
42
+ Args:
43
+ df (pandas.DataFrame): The DataFrame to modify.
44
+
45
+ Returns:
46
+ pandas.DataFrame: A new DataFrame with the string representations of NaN
47
+ replaced by actual NaN values. A copy of the DataFrame is created,
48
+ so the original DataFrame is not modified in place.
49
+ """
50
+ df = pd.DataFrame(
51
+ np.where(df.isin(["NA_NA", "NA", "NAN", "na_na", "na", "nan"]), np.nan, df),
52
+ df.index,
53
+ df.columns,
54
+ )
55
+ return df
56
+
57
+ def strip_all(self, df):
58
+ """
59
+ Removes leading and trailing whitespace from all string values in a DataFrame.
60
+
61
+ This method uses a regular expression to remove any whitespace characters
62
+ at the beginning or end of each string value in the DataFrame.
63
+
64
+ Args:
65
+ df (pandas.DataFrame): The DataFrame to modify.
66
+
67
+ Returns:
68
+ pandas.DataFrame: A new DataFrame with leading/trailing whitespace removed from all strings.
69
+ A copy of the DataFrame is created, so the original DataFrame is not modified in place.
70
+ """
71
+ df = df.replace(to_replace=r"^\s*(.*?)\s*$", value=r"\1", regex=True)
72
+ return df
73
+
74
+ def remove_all_crlf(self, df, byValue=""):
75
+ """
76
+ Removes carriage return (CR) and line feed (LF) characters from string values in a DataFrame.
77
+
78
+ This method replaces all occurrences of carriage return (`\r`) and line feed (`\n`)
79
+ characters in the DataFrame with the value specified by `byValue`. It then
80
+ optionally replaces double occurrences of `byValue` with a single `byValue`.
81
+
82
+ Args:
83
+ df (pandas.DataFrame): The DataFrame to modify.
84
+ byValue (str, optional): The value to replace CR/LF characters with. Defaults to ''.
85
+
86
+ Returns:
87
+ pandas.DataFrame: A new DataFrame with CR/LF characters removed or replaced.
88
+ A copy of the DataFrame is created, so the original DataFrame is not modified in place.
89
+ """
90
+ df = df.replace({"\n": byValue, "\r": byValue}, regex=True) # Replace CR/LF with byValue
91
+ if byValue: # Check if byValue is not empty
92
+ df = df.replace({byValue + byValue: byValue}, regex=True) # Replace double byValue
93
+ return df
94
+
95
+ def replace_by_dictionary(self, df, dictionary, column_to_apply):
96
+ """
97
+ Replaces values in a specified column of a DataFrame using a dictionary mapping.
98
+
99
+ This method uses a dictionary to map existing values in the specified column
100
+ to new values. It also replaces the micro symbol (µ) with the letter 'u'
101
+ before applying the dictionary mapping. Any values that are not found
102
+ as keys in the dictionary are left unchanged.
103
+
104
+ Args:
105
+ df (pandas.DataFrame): The DataFrame to modify.
106
+ dictionary (dict): A dictionary where keys are the values to replace and
107
+ values are their replacements.
108
+ column_to_apply (str): The name of the column to apply the replacement to.
109
+
110
+ Returns:
111
+ pandas.DataFrame: A new DataFrame with values replaced according to the dictionary.
112
+ A copy of the DataFrame is created, so the original DataFrame is not modified in place.
113
+ """
114
+ df.loc[:, column_to_apply] = (
115
+ df[column_to_apply].str.replace(r"µ", "u").map(dictionary).fillna(df[column_to_apply])
116
+ )
117
+ return df
118
+
119
+ def replace_all(
120
+ self, df, to_replace, replace_by, columns_to_clean=None, columns_not_to_clean=None
121
+ ):
122
+ """
123
+ Replaces all occurrences of a specified value with another value in a DataFrame.
124
+
125
+ This method replaces all occurrences of `to_replace` with `replace_by` within the
126
+ DataFrame. It offers flexibility in targeting specific columns:
127
+
128
+ - If both `columns_to_clean` and `columns_not_to_clean` are None (or empty lists),
129
+ the replacement is applied to *all* columns.
130
+ - If `columns_to_clean` is provided, the replacement is applied *only* to the
131
+ columns listed in this argument.
132
+ - If `columns_not_to_clean` is provided, the replacement is applied to *all*
133
+ columns *except* those listed in this argument.
134
+
135
+ Args:
136
+ df (pandas.DataFrame): The DataFrame to modify.
137
+ to_replace (str): The value to replace.
138
+ replace_by (str): The value to replace with.
139
+ columns_to_clean (list, optional): A list of column names to apply the
140
+ replacement to. Defaults to None.
141
+ columns_not_to_clean (list, optional): A list of column names to *exclude*
142
+ from the replacement. Defaults to None.
143
+
144
+ Returns:
145
+ pandas.DataFrame: A new DataFrame with the replacements made. A copy of the
146
+ DataFrame is created, so the original DataFrame is not modified in place.
147
+ """
148
+
149
+ if columns_to_clean is None and columns_not_to_clean is None: # Apply to all columns
150
+ df = df.replace(to_replace=r"" + to_replace, value=r"" + replace_by, regex=True)
151
+ elif columns_to_clean: # Apply to specified columns
152
+ for col in columns_to_clean:
153
+ df[col] = df[col].astype(str) # Ensure column is string type
154
+ df[col] = df[col].replace(
155
+ to_replace=r"" + to_replace, value=r"" + replace_by, regex=True
156
+ )
157
+ elif columns_not_to_clean: # Apply to all but specified columns
158
+ for col in df.columns:
159
+ if col not in columns_not_to_clean:
160
+ df[col] = df[col].astype(str) # Ensure column is string type
161
+ df[col] = df[col].replace(
162
+ to_replace=r"" + to_replace, value=r"" + replace_by, regex=True
163
+ )
164
+
165
+ return df
166
+
167
+ def replace_all_by_list(self, df, to_replace, columns_to_clean=None, columns_not_to_clean=None):
168
+ """
169
+ Replaces values in a DataFrame using a list of (pattern, replacement) tuples.
170
+
171
+ This method iterates through a list of tuples, where each tuple defines a
172
+ regular expression pattern and its corresponding replacement value. For each
173
+ tuple, it calls the `replaceAll` method to perform the replacement. This allows
174
+ for multiple replacements to be applied sequentially.
175
+
176
+ Args:
177
+ df (pandas.DataFrame): The DataFrame to modify.
178
+ to_replace (list of tuples): A list of tuples, where each tuple contains
179
+ (pattern, replacement). The `pattern` is a regular expression.
180
+ columns_to_clean (list, optional): A list of column names to apply the
181
+ replacements to. Defaults to None.
182
+ columns_not_to_clean (list, optional): A list of column names to *exclude*
183
+ from the replacements. Defaults to None.
184
+
185
+ Returns:
186
+ pandas.DataFrame: A new DataFrame with the replacements made. A copy of the
187
+ DataFrame is created, so the original DataFrame is not modified in place.
188
+ """
189
+ for (
190
+ pattern,
191
+ replacement,
192
+ ) in to_replace: # Iterate through the list of (pattern, replacement) tuples
193
+ df = self.replace_all(df, pattern, replacement, columns_to_clean, columns_not_to_clean)
194
+
195
+ return df
196
+
197
+ def replace_spaces_by_separator(
198
+ self, df, separator=";", columns_to_clean=None, columns_not_to_clean=None
199
+ ):
200
+ """
201
+ Replaces multiple spaces with a specified separator in a DataFrame.
202
+
203
+ This method replaces multiple consecutive whitespace characters with the specified
204
+ `separator`. It also replaces consecutive occurrences of the separator with a single
205
+ separator. This helps to normalize spacing within string values.
206
+
207
+ Args:
208
+ df (pandas.DataFrame): The DataFrame to modify.
209
+ separator (str, optional): The character to use as a separator. Defaults to ';'.
210
+ columns_to_clean (list, optional): A list of column names to apply the
211
+ replacement to. Defaults to None.
212
+ columns_not_to_clean (list, optional): A list of column names to *exclude*
213
+ from the replacement. Defaults to None.
214
+
215
+ Returns:
216
+ pandas.DataFrame: A new DataFrame with spaces replaced by the separator.
217
+ A copy of the DataFrame is created, so the original DataFrame is not
218
+ modified in place.
219
+ """
220
+ if columns_to_clean is None and columns_not_to_clean is None: # Apply to all columns
221
+ df = df.replace(to_replace=r"\s+", value=separator, regex=True).replace(
222
+ to_replace=r"[" + separator + "]+", value=separator, regex=True
223
+ ) # Replace multiple separators
224
+ elif columns_to_clean: # Apply to specified columns
225
+ for col in columns_to_clean:
226
+ df[col] = df[col].astype(str) # Ensure string type
227
+ df[col] = (
228
+ df[col]
229
+ .replace(to_replace=r"\s+", value=separator, regex=True)
230
+ .replace(to_replace=r"[" + separator + "]+", value=separator, regex=True)
231
+ ) # Replace multiple separators
232
+ elif columns_not_to_clean: # Apply to all but specified columns
233
+ for col in df.columns:
234
+ if col not in columns_not_to_clean:
235
+ df[col] = df[col].astype(str) # Ensure string type
236
+ df[col] = (
237
+ df[col]
238
+ .replace(to_replace=r"\s+", value=separator, regex=True)
239
+ .replace(to_replace=r"[" + separator + "]+", value=separator, regex=True)
240
+ ) # Replace multiple separators
241
+
242
+ return df
243
+
244
+ def transform_as_numeric(self, df, columnsToApply=None, columnsToAvoid=None, errors="coerce"):
245
+ """
246
+ Transforms specified columns in a DataFrame to numeric type, handling errors.
247
+
248
+ This method attempts to convert the values in specified columns to numeric type.
249
+ It offers flexibility in targeting specific columns:
250
+
251
+ - If both `columnsToApply` and `columnsToAvoid` are None, the conversion is
252
+ attempted on *all* columns.
253
+ - If `columnsToApply` is provided, the conversion is applied *only* to the
254
+ columns listed in this argument.
255
+ - If `columnsToAvoid` is provided, the conversion is applied to *all*
256
+ columns *except* those listed in this argument.
257
+
258
+ The `errors` parameter controls how conversion errors are handled. Using
259
+ `errors='coerce'` (the default) will replace any non-numeric values with NaN.
260
+
261
+ Args:
262
+ df (pandas.DataFrame): The DataFrame to modify.
263
+ columnsToApply (list, optional): A list of column names to apply the
264
+ conversion to. Defaults to None.
265
+ columnsToAvoid (list, optional): A list of column names to *exclude*
266
+ from the conversion. Defaults to None.
267
+ errors (str, optional): How to handle conversion errors. Defaults to 'coerce'.
268
+
269
+ Returns:
270
+ pandas.DataFrame: A new DataFrame with the columns converted to numeric.
271
+ A copy of the DataFrame is created, so the original DataFrame is not modified in place.
272
+ """
273
+ if columnsToApply is None and columnsToAvoid is None:
274
+ columns_to_convert = df.columns # Convert all columns
275
+ elif columnsToApply:
276
+ columns_to_convert = columnsToApply # Convert specified columns
277
+ elif columnsToAvoid:
278
+ columns_to_convert = df.columns.difference(
279
+ columnsToAvoid
280
+ ) # Convert all but avoided columns
281
+ else:
282
+ columns_to_convert = [] # Empty list if none of the above.
283
+
284
+ df[columns_to_convert] = df[columns_to_convert].apply(pd.to_numeric, errors=errors, axis=1)
285
+ return df
286
+
287
+ def fill_columns_with_left_value(self, df, index_row):
288
+ """
289
+ Fills NaN values in a row with the value from the nearest cell to the left.
290
+
291
+ This method iterates through the columns of a specified row from left to right.
292
+ If it encounters a NaN value, it fills it with the value from the nearest
293
+ non-NaN cell to its left. This is often useful for handling Excel files where
294
+ cells have been merged or spanned.
295
+
296
+ Args:
297
+ df (pandas.DataFrame): The DataFrame to modify.
298
+ index_row (int): The index of the row to fill.
299
+
300
+ Returns:
301
+ pandas.DataFrame: A new DataFrame with the NaN values filled.
302
+ A copy of the DataFrame is created, so the original DataFrame is not modified in place.
303
+ """
304
+ left_value = None # Store the value from the left
305
+ df = df.reset_index(drop=True) # Reset index for consistent access
306
+ for index_column in range(len(df.columns)):
307
+ current_value = df.loc[
308
+ index_row, df.columns[index_column]
309
+ ] # More efficient way to access the cell value.
310
+ if pd.isnull(current_value):
311
+ df.loc[index_row, df.columns[index_column]] = left_value # Fill with left value
312
+ else:
313
+ left_value = current_value # Update left value
314
+
315
+ return df
316
+
317
+ def rename_headers(self, df, dictionary):
318
+ """
319
+ Renames columns using a dictionary mapping.
320
+
321
+ This method renames columns using the provided dictionary, where keys are the
322
+ old column names and values are the new column names.
323
+
324
+ Args:
325
+ df (pandas.DataFrame): The DataFrame to modify.
326
+ dictionary (dict): A dictionary mapping old column names to new column names.
327
+
328
+ Returns:
329
+ pandas.DataFrame: The modified DataFrame (the modification is done in place).
330
+ """
331
+ df.rename(columns=dictionary, inplace=True)
332
+ return df
333
+
334
+ def rename_headers_by_regexp(self, df, regexp, value=""):
335
+ """
336
+ Renames columns using a regular expression substitution.
337
+
338
+ This method renames columns by applying a regular expression substitution.
339
+ The `regexp` is used to search for parts of the column names, and `value` is
340
+ used as the replacement.
341
+
342
+ Args:
343
+ df (pandas.DataFrame): The DataFrame to modify.
344
+ regexp (str): The regular expression to search for.
345
+ value (str, optional): The replacement value. Defaults to ''.
346
+
347
+ Returns:
348
+ pandas.DataFrame: A new DataFrame with the columns renamed. A copy of the
349
+ DataFrame is created, so the original DataFrame is not modified in place.
350
+ """
351
+ df = df.rename(columns=lambda x: re.sub(regexp, value, x))
352
+ return df
353
+
354
+ def rename_headers_by_regexp_list(self, df, listOfRegExp, value=""):
355
+ """
356
+ Renames columns using a list of regular expression substitutions.
357
+
358
+ This method applies a list of regular expression substitutions to the column
359
+ names. Each `regexp` in the `listOfRegExp` is used to search for parts of
360
+ the column names, and `value` is used as the replacement.
361
+
362
+ Args:
363
+ df (pandas.DataFrame): The DataFrame to modify.
364
+ listOfRegExp (list): A list of regular expressions.
365
+ value (str, optional): The replacement value. Defaults to ''.
366
+
367
+ Returns:
368
+ pandas.DataFrame: A new DataFrame with the columns renamed. A copy of the
369
+ DataFrame is created, so the original DataFrame is not modified in place.
370
+ """
371
+ for regexp in listOfRegExp:
372
+ df = self.rename_headers_by_regexp(df, regexp, value)
373
+ return df
374
+
375
+ def case_headers(self, df, uppercase=True):
376
+ """
377
+ Converts column names to uppercase or lowercase.
378
+
379
+ This method converts all column names to uppercase if `uppercase` is True,
380
+ otherwise it converts them to lowercase.
381
+
382
+ Args:
383
+ df (pandas.DataFrame): The DataFrame to modify.
384
+ uppercase (bool, optional): Whether to convert to uppercase. Defaults to True.
385
+
386
+ Returns:
387
+ pandas.DataFrame: The modified DataFrame (the modification is done in place).
388
+ """
389
+ df.columns = df.columns.str.upper() if uppercase else df.columns.str.lower()
390
+ return df
391
+
392
+ def case_column_values(self, df, columnName=None, title=False, uppercase=True):
393
+ """
394
+ Converts values in a specified column to title case, uppercase, or lowercase.
395
+
396
+ This method converts the values in the specified `columnName` to title case
397
+ (first letter uppercase, rest lowercase) if `title` is True. Otherwise, it
398
+ converts them to uppercase if `uppercase` is True, or lowercase if `uppercase`
399
+ is False. It also strips leading/trailing whitespace from the column values.
400
+
401
+ Args:
402
+ df (pandas.DataFrame): The DataFrame to modify.
403
+ columnName (str, optional): The name of the column to modify. Defaults to None.
404
+ title (bool, optional): Whether to convert to title case. Defaults to False.
405
+ uppercase (bool, optional): Whether to convert to uppercase (if title is False). Defaults to True.
406
+
407
+ Returns:
408
+ pandas.DataFrame: The modified DataFrame (the modification is done in place).
409
+ """
410
+ if columnName is not None:
411
+ df = self.replace_nan_in_column_by_nan_strings(df, columnName) # Handle potential NaN values
412
+ df.loc[:, columnName] = df[columnName].str.strip() # Remove leading/trailing whitespace
413
+
414
+ if title:
415
+ df.loc[:, columnName] = df[columnName].str.title() # Convert to title case
416
+ else:
417
+ df.loc[:, columnName] = (
418
+ df[columnName].str.upper() if uppercase else df[columnName].str.lower()
419
+ ) # Convert to upper/lower case
420
+ return df